aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/functions/CLLSTMLayer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/CL/functions/CLLSTMLayer.cpp')
-rw-r--r--src/runtime/CL/functions/CLLSTMLayer.cpp37
1 files changed, 17 insertions, 20 deletions
diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp
index 77df917119..5036126aea 100644
--- a/src/runtime/CL/functions/CLLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,7 +29,6 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLCopyKernel.h"
#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
@@ -42,7 +41,6 @@
#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
-#include "src/core/CL/kernels/CLMemsetKernel.h"
#include "src/core/CL/kernels/CLTransposeKernel.h"
namespace arm_compute
@@ -55,15 +53,14 @@ CLLSTMLayer::CLLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
_fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(),
_transpose_cell_state(std::make_unique<CLTransposeKernel>()), _accum_cell_state1(), _accum_cell_state2(), _pixelwise_mul_cell_state1(), _activation_cell_state(), _cell_clip(),
_pixelwise_mul_cell_state2(), _fully_connected_output(), _pixelwise_mul_output_state1(), _accum_output1(), _activation_output(), _activation_output_state(), _pixelwise_mul_output_state2(),
- _fully_connected_output_state(), _projection_clip(), _copy_cell_state(std::make_unique<CLCopyKernel>()), _copy_output(std::make_unique<CLCopyKernel>()), _concat_scratch_buffer(),
- _concat_inputs_forget_gate(), _concat_weights_forget_gate(), _concat_weights_input_gate(), _concat_weights_output(), _ones_memset_kernel(std::make_unique<CLMemsetKernel>()),
- _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(), _pixelwise_mul_forget_gate_coeff(), _accum_forget_gate_bias(),
- _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(), _pixelwise_mul_output_gate_coeff(), _accum_output_gate_bias(), _input_gate_out1(),
- _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(), _forget_gate_out3(), _forget_gate_out4(), _forget_gate_out5(), _forget_gate_out6(),
- _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(), _output2(), _output3(), _output4(), _cell_state_activation(), _output_state1(), _ones(),
- _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(), _cell_layer_norm_out1(), _cell_layer_norm_out2(), _output_layer_norm_out1(),
- _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false), _has_projection_weights(false), _perform_projection_clipping(false), _is_prepared(false),
- _is_layer_norm_lstm(false)
+ _fully_connected_output_state(), _projection_clip(), _copy_cell_state(), _copy_output(), _concat_scratch_buffer(), _concat_inputs_forget_gate(), _concat_weights_forget_gate(),
+ _concat_weights_input_gate(), _concat_weights_output(), _ones_fill(), _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(),
+ _pixelwise_mul_forget_gate_coeff(), _accum_forget_gate_bias(), _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(),
+ _pixelwise_mul_output_gate_coeff(), _accum_output_gate_bias(), _input_gate_out1(), _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(),
+ _forget_gate_out3(), _forget_gate_out4(), _forget_gate_out5(), _forget_gate_out6(), _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(),
+ _output2(), _output3(), _output4(), _cell_state_activation(), _output_state1(), _ones(), _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(),
+ _cell_layer_norm_out1(), _cell_layer_norm_out2(), _output_layer_norm_out1(), _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false),
+ _has_projection_weights(false), _perform_projection_clipping(false), _is_prepared(false), _is_layer_norm_lstm(false)
{
}
@@ -190,7 +187,7 @@ void CLLSTMLayer::configure(const CLCompileContext &compile_context, const ICLTe
{
_memory_group.manage(&_input_gate_out1);
_ones.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
- _ones_memset_kernel->configure(compile_context, &_ones, PixelValue(1, _ones.info()->data_type()));
+ _ones_fill.configure(compile_context, &_ones, PixelValue(1, _ones.info()->data_type()));
_subtract_input_gate.configure(compile_context, &_ones, forget_gate_out, &_input_gate_out1, ConvertPolicy::SATURATE);
_ones.allocator()->allocate();
_run_cifg_opt = true;
@@ -385,8 +382,8 @@ void CLLSTMLayer::configure(const CLCompileContext &compile_context, const ICLTe
}
// Copy cell state and output
- _copy_cell_state->configure(compile_context, &_cell_state_out1, cell_state_out);
- _copy_output->configure(compile_context, output_state_out, output);
+ _copy_cell_state.configure(compile_context, &_cell_state_out1, cell_state_out);
+ _copy_output.configure(compile_context, output_state_out, output);
// Vector for holding the tensors to store in scratch buffer
std::vector<const ICLTensor *> scratch_inputs;
@@ -618,8 +615,8 @@ Status CLLSTMLayer::validate(const ITensorInfo *input,
}
// Validate copy kernel
- ARM_COMPUTE_RETURN_ON_ERROR(CLCopyKernel::validate(&cell_state_tmp, cell_state_out));
- ARM_COMPUTE_RETURN_ON_ERROR(CLCopyKernel::validate(output_state_out, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLCopy::validate(&cell_state_tmp, cell_state_out));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLCopy::validate(output_state_out, output));
// Validate scratch concatenation
std::vector<const ITensorInfo *> inputs_vector_info_raw;
@@ -660,7 +657,7 @@ void CLLSTMLayer::run()
if(_run_cifg_opt)
{
- CLScheduler::get().enqueue(*_ones_memset_kernel);
+ _ones_fill.run();
_subtract_input_gate.run();
}
else
@@ -729,8 +726,8 @@ void CLLSTMLayer::run()
}
}
- CLScheduler::get().enqueue(*_copy_cell_state);
- CLScheduler::get().enqueue(*_copy_output);
+ _copy_cell_state.run();
+ _copy_output.run();
_concat_scratch_buffer.run();
}