From 2788609b8a10306e9eae47543b39812a7b075aaa Mon Sep 17 00:00:00 2001 From: Teresa Charlin Date: Thu, 25 Feb 2021 20:15:01 +0000 Subject: Port ClTranspose to new API Partially Resolves: COMPMID-4277 (1/2) Signed-off-by: Teresa Charlin Change-Id: I704c2303135cbe1ba46d2fd5642c84c562204bc7 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5194 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- .../runtime/CL/functions/CLFullyConnectedLayer.h | 27 +++- arm_compute/runtime/CL/functions/CLLSTMLayer.h | 177 +++++++++++---------- arm_compute/runtime/CL/functions/CLPermute.h | 4 +- arm_compute/runtime/CL/functions/CLTranspose.h | 33 +++- 4 files changed, 142 insertions(+), 99 deletions(-) (limited to 'arm_compute/runtime/CL') diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h index 3f17e4a921..a640e344d4 100644 --- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h +++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,15 +36,27 @@ namespace arm_compute { -/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls the following kernels: +/** Function to reshape the weights of Fully Connected layer with OpenCL by transposing input tensor. This function calls the following kernel: * - * -# @ref CLTransposeKernel + * -# @ref opencl::kernels::ClTransposeKernel * * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. */ -class CLFullyConnectedLayerReshapeWeights : public ICLSimpleFunction +class CLFullyConnectedLayerReshapeWeights : public IFunction { public: + /** Constructor */ + CLFullyConnectedLayerReshapeWeights(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFullyConnectedLayerReshapeWeights(const CLFullyConnectedLayerReshapeWeights &) = delete; + /** Default move constructor */ + CLFullyConnectedLayerReshapeWeights(CLFullyConnectedLayerReshapeWeights &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFullyConnectedLayerReshapeWeights &operator=(const CLFullyConnectedLayerReshapeWeights &) = delete; + /** Default move assignment operator */ + CLFullyConnectedLayerReshapeWeights &operator=(CLFullyConnectedLayerReshapeWeights &&) = default; + /** Default destructor */ + ~CLFullyConnectedLayerReshapeWeights(); /** Set the input and output tensors. * * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. @@ -66,6 +78,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; }; namespace weights_transformations diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h index 20b068316c..38a24d030b 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h @@ -46,8 +46,14 @@ namespace arm_compute { class CLCompileContext; -class CLTransposeKernel; class ICLTensor; +namespace opencl +{ +namespace kernels +{ +class ClTransposeKernel; +} +} /** This function performs a single time step in a Long Short-Term Memory (LSTM) layer. * @@ -212,90 +218,91 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - CLFullyConnectedLayer _fully_connected_input_gate; - CLArithmeticAddition _accum_input_gate1; - CLArithmeticSubtraction _subtract_input_gate; - CLPixelWiseMultiplication _pixelwise_mul_input_gate; - CLActivationLayer _activation_input_gate; - CLFullyConnectedLayer _fully_connected_forget_gate; - CLArithmeticAddition _accum_forget_gate1; - CLPixelWiseMultiplication _pixelwise_mul_forget_gate; - CLActivationLayer _activation_forget_gate; - CLFullyConnectedLayer _fully_connected_cell_state; - CLGEMM _gemm_cell_state1; - std::unique_ptr _transpose_cell_state; - CLArithmeticAddition _accum_cell_state1; - CLArithmeticAddition _accum_cell_state2; - CLPixelWiseMultiplication _pixelwise_mul_cell_state1; - CLActivationLayer _activation_cell_state; - CLActivationLayer _cell_clip; - CLPixelWiseMultiplication _pixelwise_mul_cell_state2; - CLFullyConnectedLayer _fully_connected_output; - CLPixelWiseMultiplication _pixelwise_mul_output_state1; - CLArithmeticAddition _accum_output1; - CLActivationLayer _activation_output; - CLActivationLayer _activation_output_state; - CLPixelWiseMultiplication _pixelwise_mul_output_state2; - CLFullyConnectedLayer _fully_connected_output_state; - CLActivationLayer _projection_clip; - CLCopy _copy_cell_state; - CLCopy _copy_output; - CLConcatenateLayer _concat_scratch_buffer; - CLConcatenateLayer _concat_inputs_forget_gate; - CLConcatenateLayer _concat_weights_forget_gate; - CLConcatenateLayer _concat_weights_input_gate; - CLConcatenateLayer _concat_weights_output; - CLFill _ones_fill; - CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate; - CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff; - CLArithmeticAddition _accum_input_gate_bias; - CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate; - CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff; - CLArithmeticAddition _accum_forget_gate_bias; - CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate; - CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff; - CLArithmeticAddition _accum_cell_gate_bias; - CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate; - CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff; - CLArithmeticAddition _accum_output_gate_bias; - CLTensor _input_gate_out1; - CLTensor _input_gate_out2; - CLTensor _input_gate_out3; - CLTensor _input_gate_out4; - CLTensor _forget_gate_out1; - CLTensor _forget_gate_out2; - CLTensor _forget_gate_out3; - CLTensor _forget_gate_out4; - CLTensor _forget_gate_out5; - CLTensor _forget_gate_out6; - CLTensor _cell_state_out1; - CLTensor _cell_state_out2; - CLTensor _cell_state_out3; - CLTensor _cell_state_out4; - CLTensor _cell_state_out5; - CLTensor _output1; - CLTensor _output2; - CLTensor _output3; - CLTensor _output4; - CLTensor _cell_state_activation; - CLTensor _output_state1; - CLTensor _ones; - CLTensor _input_layer_norm_out1; - CLTensor _input_layer_norm_out2; - CLTensor _forget_layer_norm_out1; - CLTensor _forget_layer_norm_out2; - CLTensor _cell_layer_norm_out1; - CLTensor _cell_layer_norm_out2; - CLTensor _output_layer_norm_out1; - CLTensor _output_layer_norm_out2; - bool _run_peephole_opt; - bool _run_cifg_opt; - bool _perform_cell_clipping; - bool _has_projection_weights; - bool _perform_projection_clipping; - bool _is_prepared; - bool _is_layer_norm_lstm; + MemoryGroup _memory_group; + CLFullyConnectedLayer _fully_connected_input_gate; + CLArithmeticAddition _accum_input_gate1; + CLArithmeticSubtraction _subtract_input_gate; + CLPixelWiseMultiplication _pixelwise_mul_input_gate; + CLActivationLayer _activation_input_gate; + CLFullyConnectedLayer _fully_connected_forget_gate; + CLArithmeticAddition _accum_forget_gate1; + CLPixelWiseMultiplication _pixelwise_mul_forget_gate; + CLActivationLayer _activation_forget_gate; + CLFullyConnectedLayer _fully_connected_cell_state; + CLGEMM _gemm_cell_state1; + std::unique_ptr _transpose_cell_state; + CLArithmeticAddition _accum_cell_state1; + CLArithmeticAddition _accum_cell_state2; + CLPixelWiseMultiplication _pixelwise_mul_cell_state1; + CLActivationLayer _activation_cell_state; + CLActivationLayer _cell_clip; + CLPixelWiseMultiplication _pixelwise_mul_cell_state2; + CLFullyConnectedLayer _fully_connected_output; + CLPixelWiseMultiplication _pixelwise_mul_output_state1; + CLArithmeticAddition _accum_output1; + CLActivationLayer _activation_output; + CLActivationLayer _activation_output_state; + CLPixelWiseMultiplication _pixelwise_mul_output_state2; + CLFullyConnectedLayer _fully_connected_output_state; + CLActivationLayer _projection_clip; + CLCopy _copy_cell_state; + CLCopy _copy_output; + CLConcatenateLayer _concat_scratch_buffer; + CLConcatenateLayer _concat_inputs_forget_gate; + CLConcatenateLayer _concat_weights_forget_gate; + CLConcatenateLayer _concat_weights_input_gate; + CLConcatenateLayer _concat_weights_output; + CLFill _ones_fill; + CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate; + CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff; + CLArithmeticAddition _accum_input_gate_bias; + CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate; + CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff; + CLArithmeticAddition _accum_forget_gate_bias; + CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate; + CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff; + CLArithmeticAddition _accum_cell_gate_bias; + CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate; + CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff; + CLArithmeticAddition _accum_output_gate_bias; + CLTensor _input_gate_out1; + CLTensor _input_gate_out2; + CLTensor _input_gate_out3; + CLTensor _input_gate_out4; + CLTensor _forget_gate_out1; + CLTensor _forget_gate_out2; + CLTensor _forget_gate_out3; + CLTensor _forget_gate_out4; + CLTensor _forget_gate_out5; + CLTensor _forget_gate_out6; + CLTensor _cell_state_out1; + CLTensor _cell_state_out2; + CLTensor _cell_state_out3; + CLTensor _cell_state_out4; + CLTensor _cell_state_out5; + CLTensor _output1; + CLTensor _output2; + CLTensor _output3; + CLTensor _output4; + CLTensor _cell_state_activation; + CLTensor _output_state1; + CLTensor _ones; + CLTensor _input_layer_norm_out1; + CLTensor _input_layer_norm_out2; + CLTensor _forget_layer_norm_out1; + CLTensor _forget_layer_norm_out2; + CLTensor _cell_layer_norm_out1; + CLTensor _cell_layer_norm_out2; + CLTensor _output_layer_norm_out1; + CLTensor _output_layer_norm_out2; + bool _run_peephole_opt; + bool _run_cifg_opt; + bool _perform_cell_clipping; + bool _has_projection_weights; + bool _perform_projection_clipping; + bool _is_prepared; + bool _is_layer_norm_lstm; + const ICLTensor *_recurrent_to_cell_weights{ nullptr }; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLLSTMLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLPermute.h b/arm_compute/runtime/CL/functions/CLPermute.h index bcd9566fbf..e59cca2a67 100644 --- a/arm_compute/runtime/CL/functions/CLPermute.h +++ b/arm_compute/runtime/CL/functions/CLPermute.h @@ -46,11 +46,11 @@ public: /** Prevent instances of this class from being copied (As this class contains pointers) */ CLPermute(const CLPermute &) = delete; /** Default move constructor */ - CLPermute(CLPermute &&); + CLPermute(CLPermute &&) = default; /** Prevent instances of this class from being copied (As this class contains pointers) */ CLPermute &operator=(const CLPermute &) = delete; /** Default move assignment operator */ - CLPermute &operator=(CLPermute &&); + CLPermute &operator=(CLPermute &&) = default; /** Set the input and output tensors. * * @note Arbitrary permutation vectors are supported with rank not greater than 4 diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h index 2b7a03f23f..43cebeba90 100644 --- a/arm_compute/runtime/CL/functions/CLTranspose.h +++ b/arm_compute/runtime/CL/functions/CLTranspose.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,9 @@ #define ARM_COMPUTE_CLTRANSPOSE_H #include "arm_compute/core/Error.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/runtime/IFunction.h" + +#include namespace arm_compute { @@ -33,14 +35,22 @@ class CLCompileContext; class ICLTensor; class ITensorInfo; -/** Basic function to transpose a matrix on OpenCL. This function calls the following OpenCL kernel: - * - * -# @ref CLTransposeKernel - * - */ -class CLTranspose : public ICLSimpleFunction +/** Basic function to execute an @ref opencl::kernels::ClTransposeKernel. */ +class CLTranspose : public IFunction { public: + /** Constructor */ + CLTranspose(); + /** Destructor */ + ~CLTranspose(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTranspose(const CLTranspose &) = delete; + /** Default move constructor */ + CLTranspose(CLTranspose &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLTranspose &operator=(const CLTranspose &) = delete; + /** Default move assignment operator */ + CLTranspose &operator=(CLTranspose &&) = default; /** Initialise the kernel's inputs and output * * @param[in] input Input tensor. Data types supported: All. @@ -62,6 +72,13 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; }; } -- cgit v1.2.1