From 7e20e29904c98adae5a91c6492fd78da88b7a9bf Mon Sep 17 00:00:00 2001 From: Sheri Zhang Date: Tue, 2 Feb 2021 11:49:34 +0000 Subject: Make memset/copy functions state-less Port following functions: - CLCopy - CLFill - CLPermute - CLReshapeLayer - CLCropResize Resolves: COMPMID-4002 Signed-off-by: Sheri Zhang Change-Id: I8392aa515aaeb5b44dab6122be6a795d08376d5f Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5003 Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins --- src/runtime/CL/functions/CLLSTMLayer.cpp | 37 +++++++++++++++----------------- 1 file changed, 17 insertions(+), 20 deletions(-) (limited to 'src/runtime/CL/functions/CLLSTMLayer.cpp') diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp index 77df917119..5036126aea 100644 --- a/src/runtime/CL/functions/CLLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLLSTMLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,7 +29,6 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLCopyKernel.h" #include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" @@ -42,7 +41,6 @@ #include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" #include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" #include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" -#include "src/core/CL/kernels/CLMemsetKernel.h" #include "src/core/CL/kernels/CLTransposeKernel.h" namespace arm_compute @@ -55,15 +53,14 @@ CLLSTMLayer::CLLSTMLayer(std::shared_ptr memory_manager) _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(), _transpose_cell_state(std::make_unique()), _accum_cell_state1(), _accum_cell_state2(), _pixelwise_mul_cell_state1(), _activation_cell_state(), _cell_clip(), _pixelwise_mul_cell_state2(), _fully_connected_output(), _pixelwise_mul_output_state1(), _accum_output1(), _activation_output(), _activation_output_state(), _pixelwise_mul_output_state2(), - _fully_connected_output_state(), _projection_clip(), _copy_cell_state(std::make_unique()), _copy_output(std::make_unique()), _concat_scratch_buffer(), - _concat_inputs_forget_gate(), _concat_weights_forget_gate(), _concat_weights_input_gate(), _concat_weights_output(), _ones_memset_kernel(std::make_unique()), - _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(), _pixelwise_mul_forget_gate_coeff(), _accum_forget_gate_bias(), - _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(), _pixelwise_mul_output_gate_coeff(), _accum_output_gate_bias(), _input_gate_out1(), - _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(), _forget_gate_out3(), _forget_gate_out4(), _forget_gate_out5(), _forget_gate_out6(), - _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(), _output2(), _output3(), _output4(), _cell_state_activation(), _output_state1(), _ones(), - _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(), _cell_layer_norm_out1(), _cell_layer_norm_out2(), _output_layer_norm_out1(), - _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false), _has_projection_weights(false), _perform_projection_clipping(false), _is_prepared(false), - _is_layer_norm_lstm(false) + _fully_connected_output_state(), _projection_clip(), _copy_cell_state(), _copy_output(), _concat_scratch_buffer(), _concat_inputs_forget_gate(), _concat_weights_forget_gate(), + _concat_weights_input_gate(), _concat_weights_output(), _ones_fill(), _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(), + _pixelwise_mul_forget_gate_coeff(), _accum_forget_gate_bias(), _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(), + _pixelwise_mul_output_gate_coeff(), _accum_output_gate_bias(), _input_gate_out1(), _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(), + _forget_gate_out3(), _forget_gate_out4(), _forget_gate_out5(), _forget_gate_out6(), _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(), + _output2(), _output3(), _output4(), _cell_state_activation(), _output_state1(), _ones(), _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(), + _cell_layer_norm_out1(), _cell_layer_norm_out2(), _output_layer_norm_out1(), _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false), + _has_projection_weights(false), _perform_projection_clipping(false), _is_prepared(false), _is_layer_norm_lstm(false) { } @@ -190,7 +187,7 @@ void CLLSTMLayer::configure(const CLCompileContext &compile_context, const ICLTe { _memory_group.manage(&_input_gate_out1); _ones.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); - _ones_memset_kernel->configure(compile_context, &_ones, PixelValue(1, _ones.info()->data_type())); + _ones_fill.configure(compile_context, &_ones, PixelValue(1, _ones.info()->data_type())); _subtract_input_gate.configure(compile_context, &_ones, forget_gate_out, &_input_gate_out1, ConvertPolicy::SATURATE); _ones.allocator()->allocate(); _run_cifg_opt = true; @@ -385,8 +382,8 @@ void CLLSTMLayer::configure(const CLCompileContext &compile_context, const ICLTe } // Copy cell state and output - _copy_cell_state->configure(compile_context, &_cell_state_out1, cell_state_out); - _copy_output->configure(compile_context, output_state_out, output); + _copy_cell_state.configure(compile_context, &_cell_state_out1, cell_state_out); + _copy_output.configure(compile_context, output_state_out, output); // Vector for holding the tensors to store in scratch buffer std::vector scratch_inputs; @@ -618,8 +615,8 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, } // Validate copy kernel - ARM_COMPUTE_RETURN_ON_ERROR(CLCopyKernel::validate(&cell_state_tmp, cell_state_out)); - ARM_COMPUTE_RETURN_ON_ERROR(CLCopyKernel::validate(output_state_out, output)); + ARM_COMPUTE_RETURN_ON_ERROR(CLCopy::validate(&cell_state_tmp, cell_state_out)); + ARM_COMPUTE_RETURN_ON_ERROR(CLCopy::validate(output_state_out, output)); // Validate scratch concatenation std::vector inputs_vector_info_raw; @@ -660,7 +657,7 @@ void CLLSTMLayer::run() if(_run_cifg_opt) { - CLScheduler::get().enqueue(*_ones_memset_kernel); + _ones_fill.run(); _subtract_input_gate.run(); } else @@ -729,8 +726,8 @@ void CLLSTMLayer::run() } } - CLScheduler::get().enqueue(*_copy_cell_state); - CLScheduler::get().enqueue(*_copy_output); + _copy_cell_state.run(); + _copy_output.run(); _concat_scratch_buffer.run(); } -- cgit v1.2.1