aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-04-02 12:51:21 +0100
committerGiuseppe Rossini <giuseppe.rossini@arm.com>2019-04-02 16:23:17 +0000
commitdbfc2dc182f90af5cad6fc283fff817ac7258a19 (patch)
tree5bf598dc0ddd76f60ce95da369e69300f3300670 /arm_compute
parent881c6842eadf2d2fd4578b9f62ee6238a83cad65 (diff)
downloadComputeLibrary-dbfc2dc182f90af5cad6fc283fff817ac7258a19.tar.gz
COMPMID-2069: Rework CL ML layers to run exclusively on CL.
Change-Id: If6cbf7a2e013d264e5d7f7cb54143ce32ba2687b Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/934 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h13
-rw-r--r--arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLLSTMLayer.h2
3 files changed, 16 insertions, 7 deletions
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
index d2f8a78f87..3751178703 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
+#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/IFunction.h"
@@ -34,9 +35,14 @@
namespace arm_compute
{
+// Forward declarations
class ICLTensor;
-/** Basic function to run @ref CLDeconvolutionLayerUpsampleKernel */
+/** Basic function to execute deconvolution upsample on OpenCL. This function calls the following OpenCL kernels and functions:
+ *
+ * -# @ref CLMemsetKernel
+ * -# @ref CLDeconvolutionLayerUpsampleKernel
+ */
class CLDeconvolutionLayerUpsample : public IFunction
{
public:
@@ -79,7 +85,8 @@ public:
private:
CLDeconvolutionLayerUpsampleKernel _upsample;
+ CLMemsetKernel _memset;
ICLTensor *_output;
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
index 936263d635..b9a435abb2 100644
--- a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
@@ -26,10 +26,9 @@
#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
+#include "arm_compute/runtime/CL/functions/CLReverse.h"
#include "arm_compute/runtime/CL/functions/CLTranspose.h"
-#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
-
#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
@@ -70,7 +69,7 @@ class ICLTensor;
* -# @ref CLConvolutionLayer
*
* And the following CPP kernels:
- * -# @ref CPPFlipWeightsKernel
+ * -# @ref CLReverse
*
*/
class CLDirectDeconvolutionLayer : public IFunction
@@ -119,11 +118,12 @@ private:
CLMemoryGroup _memory_group;
CLDeconvolutionLayerUpsample _scale_f;
CLConvolutionLayer _conv_f;
- CPPFlipWeightsKernel _flip_weights;
+ CLReverse _flip_weights;
CLTensor _scaled_output;
ICLTensor *_original_weights;
CLTensor _weights_flipped;
+ CLTensor _flip_axis;
bool _is_prepared;
};
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
index a804a4af5b..8bd47cbf8e 100644
--- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
@@ -29,6 +29,7 @@
#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
+#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
#include "arm_compute/core/Types.h"
@@ -188,6 +189,7 @@ private:
CLWidthConcatenate2TensorsKernel _concat_weights_forget_gate;
CLWidthConcatenate2TensorsKernel _concat_weights_input_gate;
CLWidthConcatenate2TensorsKernel _concat_weights_output;
+ CLMemsetKernel _ones_memset_kernel;
CLTensor _input_gate_out1;
CLTensor _input_gate_out2;
CLTensor _input_gate_out3;