From ab23dd0fbc632063235a6ad408241dc79a35d3e4 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 6 Jul 2020 14:57:36 +0100 Subject: COMPMID-3387: Support memory injection in CLActivationLayer Signed-off-by: Georgios Pinitas Change-Id: I31f9620607b372fc3340c71e748a5ea177d9da62 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3520 Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../core/CL/kernels/CLActivationLayerKernel.h | 19 ++----- .../runtime/CL/functions/CLActivationLayer.h | 47 ++++++++++++++++-- arm_compute/runtime/CL/functions/CLLSTMLayer.h | 16 +++--- arm_compute/runtime/CL/functions/CLRNNLayer.h | 4 +- src/core/CL/kernels/CLActivationLayerKernel.cpp | 43 +++++++--------- src/core/CL/kernels/CLReshapeLayerKernel.cpp | 5 +- src/runtime/CL/functions/CLActivationLayer.cpp | 58 ++++++++++++++++++++-- src/runtime/CL/functions/CLLSTMLayer.cpp | 32 ++++++------ src/runtime/CL/functions/CLRNNLayer.cpp | 8 +-- 9 files changed, 153 insertions(+), 79 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h index 1e83a689cd..d8b556a120 100644 --- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h @@ -45,16 +45,6 @@ public: CLActivationLayerKernel &operator=(CLActivationLayerKernel &&) = default; /** Default destructor */ ~CLActivationLayerKernel() = default; - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr, the activation function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - */ - void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); /** Set the input and output tensor. * * @note If the output tensor is a nullptr, the activation function will be performed in-place @@ -65,7 +55,7 @@ public: * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] act_info Activation layer information. */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); + void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo act_info); /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result @@ -78,12 +68,11 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, cl::CommandQueue &queue) override; private: - ICLTensor *_input; - ICLTensor *_output; - bool _run_in_place; + bool _run_in_place; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H */ diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h index fbb34e5fb9..7353789e4a 100644 --- a/arm_compute/runtime/CL/functions/CLActivationLayer.h +++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_CLACTIVATIONLAYER_H #define ARM_COMPUTE_CLACTIVATIONLAYER_H +#include "arm_compute/runtime/CL/ICLOperator.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" #include "arm_compute/core/Types.h" @@ -36,7 +37,7 @@ class ICLTensor; * * @note The function simulates an activation layer with the specified activation function. */ -class CLActivationLayer : public ICLSimpleFunction +class CLActivationLayer : public IFunction { public: /** Constructor @@ -44,14 +45,16 @@ public: * @param[in] ctx Runtime context to be used by the function */ CLActivationLayer(CLRuntimeContext *ctx = nullptr); + /** Destructor */ + ~CLActivationLayer(); /** Prevent instances of this class from being copied (As this class contains pointers) */ CLActivationLayer(const CLActivationLayer &) = delete; /** Default move constructor */ - CLActivationLayer(CLActivationLayer &&) = default; + CLActivationLayer(CLActivationLayer &&); /** Prevent instances of this class from being copied (As this class contains pointers) */ CLActivationLayer &operator=(const CLActivationLayer &) = delete; /** Default move assignment operator */ - CLActivationLayer &operator=(CLActivationLayer &&) = default; + CLActivationLayer &operator=(CLActivationLayer &&); /** Set the input and output tensor. * * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place @@ -83,6 +86,44 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; +}; + +namespace experimental +{ +/** Basic function to run @ref CLActivationLayerKernel */ +class CLActivationLayer : public ICLOperator +{ +public: + /** Set the input and output tensor. + * + * @param[in] compile_context The compile context to be used. + * @param[in, out] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[out] output Destination tensor info. Data type supported: same as @p input + * @param[in] act_info Activation layer parameters. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo act_info); + /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayer + * + * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result + * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); + + // Inherited methods overridden: + MemoryRequirements workspace() const override; }; +} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_CLACTIVATIONLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h index a29513aaae..7b8b5135da 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h @@ -26,7 +26,6 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" #include "arm_compute/core/CL/kernels/CLCopyKernel.h" #include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" #include "arm_compute/core/CL/kernels/CLMemsetKernel.h" @@ -34,6 +33,7 @@ #include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" @@ -207,28 +207,28 @@ private: CLArithmeticAddition _accum_input_gate1; CLSaturatedArithmeticOperationKernel _subtract_input_gate; CLPixelWiseMultiplicationKernel _pixelwise_mul_input_gate; - CLActivationLayerKernel _activation_input_gate; + CLActivationLayer _activation_input_gate; CLFullyConnectedLayer _fully_connected_forget_gate; CLArithmeticAddition _accum_forget_gate1; CLPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate; - CLActivationLayerKernel _activation_forget_gate; + CLActivationLayer _activation_forget_gate; CLFullyConnectedLayer _fully_connected_cell_state; CLGEMM _gemm_cell_state1; CLTransposeKernel _transpose_cell_state; CLSaturatedArithmeticOperationKernel _accum_cell_state1; CLSaturatedArithmeticOperationKernel _accum_cell_state2; CLPixelWiseMultiplicationKernel _pixelwise_mul_cell_state1; - CLActivationLayerKernel _activation_cell_state; - CLActivationLayerKernel _cell_clip; + CLActivationLayer _activation_cell_state; + CLActivationLayer _cell_clip; CLPixelWiseMultiplicationKernel _pixelwise_mul_cell_state2; CLFullyConnectedLayer _fully_connected_output; CLPixelWiseMultiplicationKernel _pixelwise_mul_output_state1; CLArithmeticAddition _accum_output1; - CLActivationLayerKernel _activation_output; - CLActivationLayerKernel _activation_output_state; + CLActivationLayer _activation_output; + CLActivationLayer _activation_output_state; CLPixelWiseMultiplicationKernel _pixelwise_mul_output_state2; CLFullyConnectedLayer _fully_connected_output_state; - CLActivationLayerKernel _projection_clip; + CLActivationLayer _projection_clip; CLCopyKernel _copy_cell_state; CLCopyKernel _copy_output; CLConcatenateLayer _concat_scratch_buffer; diff --git a/arm_compute/runtime/CL/functions/CLRNNLayer.h b/arm_compute/runtime/CL/functions/CLRNNLayer.h index 0291eb17a9..bd9de2b0c9 100644 --- a/arm_compute/runtime/CL/functions/CLRNNLayer.h +++ b/arm_compute/runtime/CL/functions/CLRNNLayer.h @@ -24,10 +24,10 @@ #ifndef ARM_COMPUTE_CLRNN_LAYER_H #define ARM_COMPUTE_CLRNN_LAYER_H -#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" #include "arm_compute/core/CL/kernels/CLCopyKernel.h" #include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" #include "arm_compute/runtime/CL/functions/CLGEMM.h" @@ -88,7 +88,7 @@ private: MemoryGroup _memory_group; CLGEMM _gemm_state_f; CLSaturatedArithmeticOperationKernel _add_kernel; - CLActivationLayerKernel _activation_kernel; + CLActivationLayer _activation; CLFullyConnectedLayer _fully_connected_kernel; CLCopyKernel _copy_kernel; CLTensor _fully_connected_out; diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index d40e9a15be..e030177549 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -34,6 +34,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/helpers/float_ops.h" +#include "arm_compute/core/utils/misc/Cast.h" #include "support/StringSupport.h" #include @@ -116,16 +117,11 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } // namespace CLActivationLayerKernel::CLActivationLayerKernel() - : _input(nullptr), _output(nullptr), _run_in_place(false) + : _run_in_place(false) { } -void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, act_info); -} - -void CLActivationLayerKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) +void CLActivationLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); @@ -134,14 +130,13 @@ void CLActivationLayerKernel::configure(const CLCompileContext &compile_context, if(output != nullptr) { // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), - *input->info()->clone()); + auto_init_if_empty(*output, *input->clone()); } - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, act_info)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, (output != nullptr) ? output : nullptr, act_info)); - const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); - const DataType dt = input->info()->data_type(); + const unsigned int num_elems_processed_per_iteration = 16 / input->element_size(); + const DataType dt = input->data_type(); float a_const = act_info.a(); float b_const = act_info.b(); @@ -163,7 +158,7 @@ void CLActivationLayerKernel::configure(const CLCompileContext &compile_context, // Set quantization info build options if(is_quantized) { - const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); if(!perform_activation_in_float) { @@ -214,7 +209,7 @@ void CLActivationLayerKernel::configure(const CLCompileContext &compile_context, // Set scale and offset of the input and output if they have different quantization info if(output != nullptr) { - const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); if(iq_info != oq_info) { @@ -233,12 +228,8 @@ void CLActivationLayerKernel::configure(const CLCompileContext &compile_context, // Create kernel _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); - // Make sure _kernel is initialized before calling the parent's configure - _input = input; - _output = output; - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info()); + auto win_config = validate_and_configure_window(input, (_run_in_place) ? nullptr : output); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICLKernel::configure_internal(win_config.second); @@ -246,9 +237,9 @@ void CLActivationLayerKernel::configure(const CLCompileContext &compile_context, _config_id = "activation_layer_"; _config_id += lower_string(string_from_data_type(dt)); _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); + _config_id += support::cpp11::to_string(input->dimension(0)); _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); + _config_id += support::cpp11::to_string(input->dimension(1)); } Status CLActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) @@ -260,21 +251,25 @@ Status CLActivationLayerKernel::validate(const ITensorInfo *input, const ITensor return Status{}; } -void CLActivationLayerKernel::run(const Window &window, cl::CommandQueue &queue) +void CLActivationLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); + ARM_COMPUTE_ERROR_ON(_run_in_place && src != dst); + Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); Window slice = collapsed.first_slice_window_3D(); do { unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice); + add_3D_tensor_argument(idx, src, slice); if(!_run_in_place) { - add_3D_tensor_argument(idx, _output, slice); + add_3D_tensor_argument(idx, dst, slice); } enqueue(queue, *this, slice, lws_hint()); } diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp index 97fde8645e..d486b06c8e 100644 --- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp +++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp @@ -34,6 +34,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/Cast.h" #include @@ -107,8 +108,8 @@ void CLReshapeLayerKernel::run_op(const InputTensorMap &inputs, const OutputTens Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); Window slice = window_collapsed.first_slice_window_3D(); - const auto src = dynamic_cast(inputs.at(TensorType::ACL_SRC)); - auto dst = dynamic_cast(outputs.at(TensorType::ACL_DST)); + const auto src = utils::cast::polymorphic_downcast(inputs.at(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(outputs.at(TensorType::ACL_DST)); // Set inputs unsigned int idx = 0; diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp index 989603a9df..d6b80200cd 100644 --- a/src/runtime/CL/functions/CLActivationLayer.cpp +++ b/src/runtime/CL/functions/CLActivationLayer.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLRuntimeContext.h" @@ -30,11 +31,46 @@ namespace arm_compute { +namespace experimental +{ +void CLActivationLayer::configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo act_info) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(compile_context, input, output, act_info); + _kernel = std::move(k); +} + +Status CLActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) +{ + return CLActivationLayerKernel::validate(input, output, act_info); +} + +MemoryRequirements CLActivationLayer::workspace() const +{ + return MemoryRequirements{}; +} +} // namespace experimental + +struct CLActivationLayer::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + CLRuntimeContext *ctx{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + CLActivationLayer::CLActivationLayer(CLRuntimeContext *ctx) - : ICLSimpleFunction(ctx) + : _impl(support::cpp14::make_unique()) { + _impl->ctx = ctx; } +CLActivationLayer::CLActivationLayer(CLActivationLayer &&) = default; + +CLActivationLayer &CLActivationLayer::operator=(CLActivationLayer &&) = default; + +CLActivationLayer::~CLActivationLayer() = default; + void CLActivationLayer::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) { configure(CLKernelLibrary::get().get_compile_context(), input, output, act_info); @@ -42,13 +78,25 @@ void CLActivationLayer::configure(ICLTensor *input, ICLTensor *output, Activatio void CLActivationLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) { - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(compile_context, input, output, act_info); - _kernel = std::move(k); + ARM_COMPUTE_ERROR_ON_NULLPTR(input); + + _impl->src = input; + _impl->dst = output == nullptr ? input : output; + + _impl->op = arm_compute::support::cpp14::make_unique(); + _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info(), act_info); } Status CLActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info) { - return CLActivationLayerKernel::validate(input, output, act_info); + return experimental::CLActivationLayer::validate(input, output, act_info); +} + +void CLActivationLayer::run() +{ + const InputTensorMap src{ { TensorType::ACL_SRC, _impl->src } }; + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(src, dst, {}); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp index 56f22e2fe0..e63a9cceb0 100644 --- a/src/runtime/CL/functions/CLLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLLSTMLayer.cpp @@ -499,7 +499,7 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&forget_gate, forget_gate_bias, &forget_gate, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&forget_gate, &forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&forget_gate, &forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); // Validate input gate if(!lstm_params.has_cifg_opt()) @@ -534,7 +534,7 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplicationKernel::validate(&input_gate, lstm_params.input_layer_norm_weights(), &input_gate, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&input_gate, lstm_params.input_gate_bias(), &input_gate, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&input_gate, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&input_gate, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); } else { @@ -552,14 +552,14 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&cell_state_tmp, cell_bias, &cell_state_tmp, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&cell_state_tmp, nullptr, activation_info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, nullptr, activation_info)); ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplicationKernel::validate(&cell_state_tmp, &input_gate, &cell_state_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplicationKernel::validate(&cell_state_tmp, &forget_gate, &cell_state_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&cell_state_tmp, &cell_state_tmp, &cell_state_tmp, ConvertPolicy::SATURATE)); if(cell_threshold != 0.f) { - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&cell_state_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, - cell_threshold))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, + cell_threshold))); } std::vector in_out_weights; @@ -584,18 +584,18 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, RoundingPolicy::TO_NEAREST_EVEN)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&output_gate_tmp, output_gate_bias, &output_gate_tmp, ConvertPolicy::SATURATE)); } - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&output_gate_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&output_gate_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); // Validate output state - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&cell_state_tmp, &cell_state_tmp, activation_info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, &cell_state_tmp, activation_info)); ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplicationKernel::validate(&cell_state_tmp, &output_gate_tmp, &output_gate_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN)); if(lstm_params.has_projection()) { ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(&output_gate_tmp, lstm_params.projection_weights(), lstm_params.projection_bias(), output_state_out)); if(projection_threshold != 0.f) { - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(output_state_out, output_state_out, - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold))); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output_state_out, output_state_out, + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold))); } } @@ -638,7 +638,7 @@ void CLLSTMLayer::run() CLScheduler::get().enqueue(_pixelwise_mul_forget_gate_coeff); CLScheduler::get().enqueue(_accum_forget_gate_bias); } - CLScheduler::get().enqueue(_activation_forget_gate); + _activation_forget_gate.run(); if(_run_cifg_opt) { @@ -661,7 +661,7 @@ void CLLSTMLayer::run() CLScheduler::get().enqueue(_pixelwise_mul_input_gate_coeff); CLScheduler::get().enqueue(_accum_input_gate_bias); } - CLScheduler::get().enqueue(_activation_input_gate); + _activation_input_gate.run(); } _fully_connected_cell_state.run(); @@ -674,14 +674,14 @@ void CLLSTMLayer::run() CLScheduler::get().enqueue(_pixelwise_mul_cell_gate_coeff); CLScheduler::get().enqueue(_accum_cell_gate_bias); } - CLScheduler::get().enqueue(_activation_cell_state); + _activation_cell_state.run(); CLScheduler::get().enqueue(_pixelwise_mul_cell_state1); CLScheduler::get().enqueue(_pixelwise_mul_cell_state2); CLScheduler::get().enqueue(_accum_cell_state2); if(_perform_cell_clipping) { - CLScheduler::get().enqueue(_cell_clip); + _cell_clip.run(); } _fully_connected_output.run(); @@ -697,9 +697,9 @@ void CLLSTMLayer::run() CLScheduler::get().enqueue(_pixelwise_mul_output_gate_coeff); CLScheduler::get().enqueue(_accum_output_gate_bias); } - CLScheduler::get().enqueue(_activation_output); + _activation_output.run(); - CLScheduler::get().enqueue(_activation_output_state); + _activation_output_state.run(); CLScheduler::get().enqueue(_pixelwise_mul_output_state2); if(_has_projection_weights) @@ -707,7 +707,7 @@ void CLLSTMLayer::run() _fully_connected_output_state.run(); if(_perform_projection_clipping) { - CLScheduler::get().enqueue(_projection_clip); + _projection_clip.run(); } } diff --git a/src/runtime/CL/functions/CLRNNLayer.cpp b/src/runtime/CL/functions/CLRNNLayer.cpp index 57b8d70089..075f4a4ebd 100644 --- a/src/runtime/CL/functions/CLRNNLayer.cpp +++ b/src/runtime/CL/functions/CLRNNLayer.cpp @@ -35,7 +35,7 @@ using namespace arm_compute; using namespace arm_compute::misc::shape_calculator; CLRNNLayer::CLRNNLayer(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(), + : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(), _is_prepared(false) { } @@ -60,7 +60,7 @@ Status CLRNNLayer::validate(const ITensorInfo *input, const ITensorInfo *weights ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, weights, bias, &shape_info)); ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(hidden_state, recurrent_weights, nullptr, &shape_info, 1.f, 0.f)); ARM_COMPUTE_RETURN_ON_ERROR(CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::ADD, &shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE)); - ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&shape_info, &shape_info, info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&shape_info, &shape_info, info)); return Status{}; } @@ -101,7 +101,7 @@ void CLRNNLayer::configure(const CLCompileContext &compile_context, const ICLTen _fully_connected_out.allocator()->allocate(); _gemm_output.allocator()->allocate(); - _activation_kernel.configure(compile_context, &_add_output, hidden_state, info); + _activation.configure(compile_context, &_add_output, hidden_state, info); _add_output.allocator()->allocate(); _copy_kernel.configure(compile_context, hidden_state, output); @@ -116,7 +116,7 @@ void CLRNNLayer::run() _fully_connected_kernel.run(); _gemm_state_f.run(); CLScheduler::get().enqueue(_add_kernel); - CLScheduler::get().enqueue(_activation_kernel); + _activation.run(); // copy hidden out to output CLScheduler::get().enqueue(_copy_kernel); -- cgit v1.2.1