aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h13
-rw-r--r--arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLLSTMLayer.h2
3 files changed, 16 insertions, 7 deletions
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
index d2f8a78f87..3751178703 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
+#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/IFunction.h"
@@ -34,9 +35,14 @@
namespace arm_compute
{
+// Forward declarations
class ICLTensor;
-/** Basic function to run @ref CLDeconvolutionLayerUpsampleKernel */
+/** Basic function to execute deconvolution upsample on OpenCL. This function calls the following OpenCL kernels and functions:
+ *
+ * -# @ref CLMemsetKernel
+ * -# @ref CLDeconvolutionLayerUpsampleKernel
+ */
class CLDeconvolutionLayerUpsample : public IFunction
{
public:
@@ -79,7 +85,8 @@ public:
private:
CLDeconvolutionLayerUpsampleKernel _upsample;
+ CLMemsetKernel _memset;
ICLTensor *_output;
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
index 936263d635..b9a435abb2 100644
--- a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
@@ -26,10 +26,9 @@
#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
+#include "arm_compute/runtime/CL/functions/CLReverse.h"
#include "arm_compute/runtime/CL/functions/CLTranspose.h"
-#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
-
#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
@@ -70,7 +69,7 @@ class ICLTensor;
* -# @ref CLConvolutionLayer
*
* And the following CPP kernels:
- * -# @ref CPPFlipWeightsKernel
+ * -# @ref CLReverse
*
*/
class CLDirectDeconvolutionLayer : public IFunction
@@ -119,11 +118,12 @@ private:
CLMemoryGroup _memory_group;
CLDeconvolutionLayerUpsample _scale_f;
CLConvolutionLayer _conv_f;
- CPPFlipWeightsKernel _flip_weights;
+ CLReverse _flip_weights;
CLTensor _scaled_output;
ICLTensor *_original_weights;
CLTensor _weights_flipped;
+ CLTensor _flip_axis;
bool _is_prepared;
};
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
index a804a4af5b..8bd47cbf8e 100644
--- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
@@ -29,6 +29,7 @@
#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
+#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
#include "arm_compute/core/Types.h"
@@ -188,6 +189,7 @@ private:
CLWidthConcatenate2TensorsKernel _concat_weights_forget_gate;
CLWidthConcatenate2TensorsKernel _concat_weights_input_gate;
CLWidthConcatenate2TensorsKernel _concat_weights_output;
+ CLMemsetKernel _ones_memset_kernel;
CLTensor _input_gate_out1;
CLTensor _input_gate_out2;
CLTensor _input_gate_out3;