From dbfc2dc182f90af5cad6fc283fff817ac7258a19 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 2 Apr 2019 12:51:21 +0100 Subject: COMPMID-2069: Rework CL ML layers to run exclusively on CL. Change-Id: If6cbf7a2e013d264e5d7f7cb54143ce32ba2687b Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/934 Comments-Addressed: Arm Jenkins Reviewed-by: Isabella Gottardi Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins --- .../runtime/CL/functions/CLDeconvolutionLayerUpsample.h | 13 ++++++++++--- .../runtime/CL/functions/CLDirectDeconvolutionLayer.h | 8 ++++---- arm_compute/runtime/CL/functions/CLLSTMLayer.h | 2 ++ 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h index d2f8a78f87..3751178703 100644 --- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h +++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" +#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/IFunction.h" @@ -34,9 +35,14 @@ namespace arm_compute { +// Forward declarations class ICLTensor; -/** Basic function to run @ref CLDeconvolutionLayerUpsampleKernel */ +/** Basic function to execute deconvolution upsample on OpenCL. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLMemsetKernel + * -# @ref CLDeconvolutionLayerUpsampleKernel + */ class CLDeconvolutionLayerUpsample : public IFunction { public: @@ -79,7 +85,8 @@ public: private: CLDeconvolutionLayerUpsampleKernel _upsample; + CLMemsetKernel _memset; ICLTensor *_output; }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h index 936263d635..b9a435abb2 100644 --- a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h @@ -26,10 +26,9 @@ #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h" +#include "arm_compute/runtime/CL/functions/CLReverse.h" #include "arm_compute/runtime/CL/functions/CLTranspose.h" -#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h" - #include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" @@ -70,7 +69,7 @@ class ICLTensor; * -# @ref CLConvolutionLayer * * And the following CPP kernels: - * -# @ref CPPFlipWeightsKernel + * -# @ref CLReverse * */ class CLDirectDeconvolutionLayer : public IFunction @@ -119,11 +118,12 @@ private: CLMemoryGroup _memory_group; CLDeconvolutionLayerUpsample _scale_f; CLConvolutionLayer _conv_f; - CPPFlipWeightsKernel _flip_weights; + CLReverse _flip_weights; CLTensor _scaled_output; ICLTensor *_original_weights; CLTensor _weights_flipped; + CLTensor _flip_axis; bool _is_prepared; }; diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h index a804a4af5b..8bd47cbf8e 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h @@ -29,6 +29,7 @@ #include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" #include "arm_compute/core/CL/kernels/CLCopyKernel.h" #include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" +#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" #include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" #include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" #include "arm_compute/core/Types.h" @@ -188,6 +189,7 @@ private: CLWidthConcatenate2TensorsKernel _concat_weights_forget_gate; CLWidthConcatenate2TensorsKernel _concat_weights_input_gate; CLWidthConcatenate2TensorsKernel _concat_weights_output; + CLMemsetKernel _ones_memset_kernel; CLTensor _input_gate_out1; CLTensor _input_gate_out2; CLTensor _input_gate_out3; -- cgit v1.2.1