aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorTeresa Charlin <teresa.charlinreyes@arm.com>2021-02-25 20:15:01 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-03-29 20:23:11 +0000
commit2788609b8a10306e9eae47543b39812a7b075aaa (patch)
tree81515046e0c06d6a21ecdcebfe083ea5922fea0c /arm_compute
parentf9a611a1fd309bb9a906c99eede5e6b7bceba26b (diff)
downloadComputeLibrary-2788609b8a10306e9eae47543b39812a7b075aaa.tar.gz
Port ClTranspose to new API
Partially Resolves: COMPMID-4277 (1/2) Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: I704c2303135cbe1ba46d2fd5642c84c562204bc7 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5194 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h27
-rw-r--r--arm_compute/runtime/CL/functions/CLLSTMLayer.h177
-rw-r--r--arm_compute/runtime/CL/functions/CLPermute.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLTranspose.h33
4 files changed, 142 insertions, 99 deletions
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 3f17e4a921..a640e344d4 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,15 +36,27 @@
namespace arm_compute
{
-/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls the following kernels:
+/** Function to reshape the weights of Fully Connected layer with OpenCL by transposing input tensor. This function calls the following kernel:
*
- * -# @ref CLTransposeKernel
+ * -# @ref opencl::kernels::ClTransposeKernel
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
-class CLFullyConnectedLayerReshapeWeights : public ICLSimpleFunction
+class CLFullyConnectedLayerReshapeWeights : public IFunction
{
public:
+ /** Constructor */
+ CLFullyConnectedLayerReshapeWeights();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFullyConnectedLayerReshapeWeights(const CLFullyConnectedLayerReshapeWeights &) = delete;
+ /** Default move constructor */
+ CLFullyConnectedLayerReshapeWeights(CLFullyConnectedLayerReshapeWeights &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFullyConnectedLayerReshapeWeights &operator=(const CLFullyConnectedLayerReshapeWeights &) = delete;
+ /** Default move assignment operator */
+ CLFullyConnectedLayerReshapeWeights &operator=(CLFullyConnectedLayerReshapeWeights &&) = default;
+ /** Default destructor */
+ ~CLFullyConnectedLayerReshapeWeights();
/** Set the input and output tensors.
*
* @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -66,6 +78,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
namespace weights_transformations
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
index 20b068316c..38a24d030b 100644
--- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
@@ -46,8 +46,14 @@
namespace arm_compute
{
class CLCompileContext;
-class CLTransposeKernel;
class ICLTensor;
+namespace opencl
+{
+namespace kernels
+{
+class ClTransposeKernel;
+}
+}
/** This function performs a single time step in a Long Short-Term Memory (LSTM) layer.
*
@@ -212,90 +218,91 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLFullyConnectedLayer _fully_connected_input_gate;
- CLArithmeticAddition _accum_input_gate1;
- CLArithmeticSubtraction _subtract_input_gate;
- CLPixelWiseMultiplication _pixelwise_mul_input_gate;
- CLActivationLayer _activation_input_gate;
- CLFullyConnectedLayer _fully_connected_forget_gate;
- CLArithmeticAddition _accum_forget_gate1;
- CLPixelWiseMultiplication _pixelwise_mul_forget_gate;
- CLActivationLayer _activation_forget_gate;
- CLFullyConnectedLayer _fully_connected_cell_state;
- CLGEMM _gemm_cell_state1;
- std::unique_ptr<CLTransposeKernel> _transpose_cell_state;
- CLArithmeticAddition _accum_cell_state1;
- CLArithmeticAddition _accum_cell_state2;
- CLPixelWiseMultiplication _pixelwise_mul_cell_state1;
- CLActivationLayer _activation_cell_state;
- CLActivationLayer _cell_clip;
- CLPixelWiseMultiplication _pixelwise_mul_cell_state2;
- CLFullyConnectedLayer _fully_connected_output;
- CLPixelWiseMultiplication _pixelwise_mul_output_state1;
- CLArithmeticAddition _accum_output1;
- CLActivationLayer _activation_output;
- CLActivationLayer _activation_output_state;
- CLPixelWiseMultiplication _pixelwise_mul_output_state2;
- CLFullyConnectedLayer _fully_connected_output_state;
- CLActivationLayer _projection_clip;
- CLCopy _copy_cell_state;
- CLCopy _copy_output;
- CLConcatenateLayer _concat_scratch_buffer;
- CLConcatenateLayer _concat_inputs_forget_gate;
- CLConcatenateLayer _concat_weights_forget_gate;
- CLConcatenateLayer _concat_weights_input_gate;
- CLConcatenateLayer _concat_weights_output;
- CLFill _ones_fill;
- CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate;
- CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff;
- CLArithmeticAddition _accum_input_gate_bias;
- CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate;
- CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff;
- CLArithmeticAddition _accum_forget_gate_bias;
- CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate;
- CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff;
- CLArithmeticAddition _accum_cell_gate_bias;
- CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate;
- CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff;
- CLArithmeticAddition _accum_output_gate_bias;
- CLTensor _input_gate_out1;
- CLTensor _input_gate_out2;
- CLTensor _input_gate_out3;
- CLTensor _input_gate_out4;
- CLTensor _forget_gate_out1;
- CLTensor _forget_gate_out2;
- CLTensor _forget_gate_out3;
- CLTensor _forget_gate_out4;
- CLTensor _forget_gate_out5;
- CLTensor _forget_gate_out6;
- CLTensor _cell_state_out1;
- CLTensor _cell_state_out2;
- CLTensor _cell_state_out3;
- CLTensor _cell_state_out4;
- CLTensor _cell_state_out5;
- CLTensor _output1;
- CLTensor _output2;
- CLTensor _output3;
- CLTensor _output4;
- CLTensor _cell_state_activation;
- CLTensor _output_state1;
- CLTensor _ones;
- CLTensor _input_layer_norm_out1;
- CLTensor _input_layer_norm_out2;
- CLTensor _forget_layer_norm_out1;
- CLTensor _forget_layer_norm_out2;
- CLTensor _cell_layer_norm_out1;
- CLTensor _cell_layer_norm_out2;
- CLTensor _output_layer_norm_out1;
- CLTensor _output_layer_norm_out2;
- bool _run_peephole_opt;
- bool _run_cifg_opt;
- bool _perform_cell_clipping;
- bool _has_projection_weights;
- bool _perform_projection_clipping;
- bool _is_prepared;
- bool _is_layer_norm_lstm;
+ MemoryGroup _memory_group;
+ CLFullyConnectedLayer _fully_connected_input_gate;
+ CLArithmeticAddition _accum_input_gate1;
+ CLArithmeticSubtraction _subtract_input_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_input_gate;
+ CLActivationLayer _activation_input_gate;
+ CLFullyConnectedLayer _fully_connected_forget_gate;
+ CLArithmeticAddition _accum_forget_gate1;
+ CLPixelWiseMultiplication _pixelwise_mul_forget_gate;
+ CLActivationLayer _activation_forget_gate;
+ CLFullyConnectedLayer _fully_connected_cell_state;
+ CLGEMM _gemm_cell_state1;
+ std::unique_ptr<opencl::kernels::ClTransposeKernel> _transpose_cell_state;
+ CLArithmeticAddition _accum_cell_state1;
+ CLArithmeticAddition _accum_cell_state2;
+ CLPixelWiseMultiplication _pixelwise_mul_cell_state1;
+ CLActivationLayer _activation_cell_state;
+ CLActivationLayer _cell_clip;
+ CLPixelWiseMultiplication _pixelwise_mul_cell_state2;
+ CLFullyConnectedLayer _fully_connected_output;
+ CLPixelWiseMultiplication _pixelwise_mul_output_state1;
+ CLArithmeticAddition _accum_output1;
+ CLActivationLayer _activation_output;
+ CLActivationLayer _activation_output_state;
+ CLPixelWiseMultiplication _pixelwise_mul_output_state2;
+ CLFullyConnectedLayer _fully_connected_output_state;
+ CLActivationLayer _projection_clip;
+ CLCopy _copy_cell_state;
+ CLCopy _copy_output;
+ CLConcatenateLayer _concat_scratch_buffer;
+ CLConcatenateLayer _concat_inputs_forget_gate;
+ CLConcatenateLayer _concat_weights_forget_gate;
+ CLConcatenateLayer _concat_weights_input_gate;
+ CLConcatenateLayer _concat_weights_output;
+ CLFill _ones_fill;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff;
+ CLArithmeticAddition _accum_input_gate_bias;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff;
+ CLArithmeticAddition _accum_forget_gate_bias;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff;
+ CLArithmeticAddition _accum_cell_gate_bias;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff;
+ CLArithmeticAddition _accum_output_gate_bias;
+ CLTensor _input_gate_out1;
+ CLTensor _input_gate_out2;
+ CLTensor _input_gate_out3;
+ CLTensor _input_gate_out4;
+ CLTensor _forget_gate_out1;
+ CLTensor _forget_gate_out2;
+ CLTensor _forget_gate_out3;
+ CLTensor _forget_gate_out4;
+ CLTensor _forget_gate_out5;
+ CLTensor _forget_gate_out6;
+ CLTensor _cell_state_out1;
+ CLTensor _cell_state_out2;
+ CLTensor _cell_state_out3;
+ CLTensor _cell_state_out4;
+ CLTensor _cell_state_out5;
+ CLTensor _output1;
+ CLTensor _output2;
+ CLTensor _output3;
+ CLTensor _output4;
+ CLTensor _cell_state_activation;
+ CLTensor _output_state1;
+ CLTensor _ones;
+ CLTensor _input_layer_norm_out1;
+ CLTensor _input_layer_norm_out2;
+ CLTensor _forget_layer_norm_out1;
+ CLTensor _forget_layer_norm_out2;
+ CLTensor _cell_layer_norm_out1;
+ CLTensor _cell_layer_norm_out2;
+ CLTensor _output_layer_norm_out1;
+ CLTensor _output_layer_norm_out2;
+ bool _run_peephole_opt;
+ bool _run_cifg_opt;
+ bool _perform_cell_clipping;
+ bool _has_projection_weights;
+ bool _perform_projection_clipping;
+ bool _is_prepared;
+ bool _is_layer_norm_lstm;
+ const ICLTensor *_recurrent_to_cell_weights{ nullptr };
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLLSTMLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLPermute.h b/arm_compute/runtime/CL/functions/CLPermute.h
index bcd9566fbf..e59cca2a67 100644
--- a/arm_compute/runtime/CL/functions/CLPermute.h
+++ b/arm_compute/runtime/CL/functions/CLPermute.h
@@ -46,11 +46,11 @@ public:
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLPermute(const CLPermute &) = delete;
/** Default move constructor */
- CLPermute(CLPermute &&);
+ CLPermute(CLPermute &&) = default;
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLPermute &operator=(const CLPermute &) = delete;
/** Default move assignment operator */
- CLPermute &operator=(CLPermute &&);
+ CLPermute &operator=(CLPermute &&) = default;
/** Set the input and output tensors.
*
* @note Arbitrary permutation vectors are supported with rank not greater than 4
diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h
index 2b7a03f23f..43cebeba90 100644
--- a/arm_compute/runtime/CL/functions/CLTranspose.h
+++ b/arm_compute/runtime/CL/functions/CLTranspose.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,9 @@
#define ARM_COMPUTE_CLTRANSPOSE_H
#include "arm_compute/core/Error.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
@@ -33,14 +35,22 @@ class CLCompileContext;
class ICLTensor;
class ITensorInfo;
-/** Basic function to transpose a matrix on OpenCL. This function calls the following OpenCL kernel:
- *
- * -# @ref CLTransposeKernel
- *
- */
-class CLTranspose : public ICLSimpleFunction
+/** Basic function to execute an @ref opencl::kernels::ClTransposeKernel. */
+class CLTranspose : public IFunction
{
public:
+ /** Constructor */
+ CLTranspose();
+ /** Destructor */
+ ~CLTranspose();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTranspose(const CLTranspose &) = delete;
+ /** Default move constructor */
+ CLTranspose(CLTranspose &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTranspose &operator=(const CLTranspose &) = delete;
+ /** Default move assignment operator */
+ CLTranspose &operator=(CLTranspose &&) = default;
/** Initialise the kernel's inputs and output
*
* @param[in] input Input tensor. Data types supported: All.
@@ -62,6 +72,13 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
}