From 856f66e6c61b77d03f754cd0fa8439891f0e4aca Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Thu, 22 Apr 2021 21:13:21 +0100
Subject: Port CLGEMM to memory injecting interface

Moves the following kernels:
 - CLGEMMMatrixMultiplyKernel
 - CLGEMMMatrixMultiplyNativeKernel
 - CLGEMMMatrixMultipluReshapedKernel
 - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel

 Moves the following functions
 - CLGEMM

Introduces facilities to easy handling of auxiliary temporary buffers
under then new run interface. Such are:
 - CLAuxTensorHandler: That allows wrapping of workspace buffers memory
 to CLBuffer objects
 - Ability to inject TensorInfo to allocator without transferring
 ownership. This reduce the copy overhead if needed.

Resolves: COMPMID-4188

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I7055435d831b05b749b26302082e4ac45f26dfb0
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5498
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 .../runtime/CL/functions/CLFullyConnectedLayer.h   |   2 +-
 arm_compute/runtime/CL/functions/CLGEMM.h          | 145 +++------------------
 .../CL/functions/CLGEMMLowpMatrixMultiplyCore.h    |  10 +-
 arm_compute/runtime/CL/functions/CLLogicalAnd.h    |  62 ++++-----
 arm_compute/runtime/CL/functions/CLLogicalOr.h     |  62 ++++-----
 arm_compute/runtime/CL/functions/CLSlice.h         |  78 +++++------
 6 files changed, 128 insertions(+), 231 deletions(-)

(limited to 'arm_compute/runtime/CL/functions')

diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index eec01bcebe..075c5d1f45 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -98,7 +98,7 @@ private:
  *
  *  -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer)
  *  -# @ref CLTranspose (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
- *  -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
+ *  -# @ref opencl::kernels::ClGemmMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
  *
  * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
  */
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index 1e2ae7be64..38a07ef9fb 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -35,76 +35,12 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class CLCompileContext;
-class CLGEMMReshapeRHSMatrixKernel;
-class CLGEMMMatrixMultiplyKernel;
-class CLGEMMMatrixMultiplyReshapedKernel;
-class CLGEMMMatrixMultiplyReshapedOnlyRHSKernel;
-class CLGEMMReshapeLHSMatrixKernel;
 class ICLTensor;
 class ITensorInfo;
 
-namespace weights_transformations
-{
-/** Basic function to manage the reshape weights generated from @ref CLGEMMReshapeRHSMatrixKernel */
-class CLGEMMReshapeRHSMatrixKernelManaged : public ITransformWeights
-{
-public:
-    /** Default constructor */
-    CLGEMMReshapeRHSMatrixKernelManaged();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLGEMMReshapeRHSMatrixKernelManaged(const CLGEMMReshapeRHSMatrixKernelManaged &) = delete;
-    /** Default move constructor */
-    CLGEMMReshapeRHSMatrixKernelManaged(CLGEMMReshapeRHSMatrixKernelManaged &&) = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLGEMMReshapeRHSMatrixKernelManaged &operator=(const CLGEMMReshapeRHSMatrixKernelManaged &) = delete;
-    /** Default move assignment operator */
-    CLGEMMReshapeRHSMatrixKernelManaged &operator=(CLGEMMReshapeRHSMatrixKernelManaged &&) = default;
-    /** Default desctructor */
-    ~CLGEMMReshapeRHSMatrixKernelManaged();
-    //Inherited method override
-    void run() override;
-
-    //Inherited method override
-    void release() override;
-
-    //Inherited method override
-    ICLTensor *get_weights() override;
-
-    //Inherited method override
-    uint32_t uid() override;
-
-    /** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
-     *
-     * @param[in] input Input tensor. Data types supported: All
-     * @param[in] info  RHS matrix information to be used for reshaping.
-     */
-    void configure(const ICLTensor *input, GEMMRHSMatrixInfo info);
-
-    /** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
-     *
-     * @param[in] compile_context The compile context to be used.
-     * @param[in] input           Input tensor. Data types supported: All
-     * @param[in] info            RHS matrix information to be used for reshaping.
-     */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info);
-
-private:
-    static constexpr uint32_t                     _uid{ 0x15 };
-    CLTensor                                      _output{};
-    std::unique_ptr<CLGEMMReshapeRHSMatrixKernel> _kernel;
-};
-} // namespace weights_transformations
-
-/** Basic function to execute GEMM on OpenCL. This function calls the following OpenCL kernels:
- *
- *  -# @ref CLGEMMReshapeLHSMatrixKernel (only if the RESHAPED_V1 is selected by the heuristic model)
- *  -# @ref CLGEMMReshapeRHSMatrixKernel (only if either the RESHAPED_V1 or RESHAPED_ONLY_RHS is selected by the select_gemm_kernel method())
- *  -# @ref CLGEMMMatrixMultiplyKernel (only if either the NATIVE or RESHAPED_V1 is selected by the select_gemm_kernel method())
- *  -# @ref CLGEMMMatrixMultiplyReshapedKernel (only if RESHAPED_V1 is selected by the select_gemm_kernel method())
- *  -# @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (only if RESHAPED_ONLY_RHS is selected by the select_gemm_kernel method())
- *
- */
+/** Basic function to execute GEMM on OpenCL */
 class CLGEMM : public IFunction
 {
 public:
@@ -114,16 +50,16 @@ public:
      * @param[in] weights_manager (Optional) Weights manager.
      */
     CLGEMM(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
+    /** Default destructor */
+    ~CLGEMM();
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLGEMM(const CLGEMM &) = delete;
     /** Default move constructor */
-    CLGEMM(CLGEMM &&) = default;
+    CLGEMM(CLGEMM &&);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLGEMM &operator=(const CLGEMM &) = delete;
     /** Default move assignment operator */
-    CLGEMM &operator=(CLGEMM &&) = default;
-    /** Default destructor */
-    ~CLGEMM();
+    CLGEMM &operator=(CLGEMM &&);
     /** Initialise the kernel's inputs and output
      *
      * Valid data layouts:
@@ -134,25 +70,6 @@ public:
      * |:------------|:-----------|:---------|:--------------|
      * |F32          |F32         |F32       |F32            |
      * |F16          |F16         |F16       |F16            |
-     *
-     * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
-     *
-     * @note All tensors must have the same data type.
-     *
-     * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix
-     *
-     * @param[in]  a         First input tensor  (Matrix or Vector A). Data types supported: F16/F32
-     * @param[in]  b         Second input tensor (Matrix B). Data type supported: same as @p a.
-     * @param[in]  c         Third input tensor  (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
-     * @param[out] output    Output tensor. Data type supported: same as @p a
-     * @param[in]  alpha     Weight of the matrix product
-     * @param[in]  beta      Weight of matrix C
-     * @param[in]  gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
-     *                       if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping
-     *                       in case matrix A and matrix B have been already transformed.
-     */
-    void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
-    /** Initialise the kernel's inputs and output
      *
      * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
      *
@@ -168,20 +85,20 @@ public:
      * @param[in]  alpha           Weight of the matrix product
      * @param[in]  beta            Weight of matrix C
      * @param[in]  gemm_info       (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
-     *                       if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping
-     *                       in case matrix A and matrix B have been already transformed.
+     *                             if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping
+     *                             in case matrix A and matrix B have been already transformed.
      */
     void configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
+
+    /** Initialise the kernel's inputs and output
+     *
+     * Similar to @ref CLGEMM::configure()
+     */
+    void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
+
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMM.
      *
-     * @param[in] a         First input tensor info  (Matrix or Vector A). Data types supported: F16/F32
-     * @param[in] b         Second input tensor info (Matrix B). Data type supported: same as @p a.
-     * @param[in] c         Third input tensor info  (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
-     * @param[in] output    Output tensor info. Data type supported: same as @p a
-     * @param[in] alpha     Weight of the matrix product
-     * @param[in] beta      Weight of matrix C
-     * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
-     *                       if the reshape of matrix B should happen only for the first run
+     * Similar to @ref CLGEMM::configure()
      *
      * @return a status
      */
@@ -192,34 +109,8 @@ public:
     void prepare() override;
 
 private:
-    void configure_native_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped_v2(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped_only_rhs(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta,
-                                     const GEMMInfo &gemm_info);
-
-    static Status validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    static Status validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    static Status validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    static Status validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-
-    MemoryGroup                                                                   _memory_group;
-    IWeightsManager                                                              *_weights_manager;
-    std::unique_ptr<CLGEMMMatrixMultiplyKernel>                                   _mm_kernel;
-    std::unique_ptr<CLGEMMReshapeLHSMatrixKernel>                                 _reshape_lhs_kernel;
-    std::unique_ptr<CLGEMMReshapeRHSMatrixKernel>                                 _reshape_rhs_kernel;
-    std::unique_ptr<weights_transformations::CLGEMMReshapeRHSMatrixKernelManaged> _reshape_rhs_kernel_managed;
-    std::unique_ptr<CLGEMMMatrixMultiplyReshapedKernel>                           _mm_reshaped_kernel;
-    std::unique_ptr<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel>                    _mm_reshaped_only_rhs_kernel;
-    std::unique_ptr<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel>                    _mm_reshaped_only_rhs_fallback_kernel;
-    CLTensor                                                                      _tmp_a;
-    CLTensor                                                                      _tmp_b;
-    const ICLTensor                                                              *_original_b;
-    const ICLTensor                                                              *_lhs;
-    ICLTensor                                                                    *_dst;
-    bool                                                                          _reshape_b_only_on_first_run;
-    bool                                                                          _is_prepared;
-    CLGEMMKernelType                                                              _gemm_kernel_type;
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 } // namespace arm_compute
 
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index e7f4cb9d01..e5de45c34f 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -41,7 +41,13 @@ class CLGEMMLowpOffsetContributionKernel;
 class CLGEMMLowpOffsetContributionOutputStageKernel;
 class CLGEMMLowpMatrixAReductionKernel;
 class CLGEMMLowpMatrixBReductionKernel;
-class CLGEMMReshapeRHSMatrixKernel;
+namespace opencl
+{
+namespace kernels
+{
+class ClGemmReshapeRhsMatrixKernel;
+} // namespace kernels
+} // namespace opencl
 
 /** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. */
 class CLGEMMLowpMatrixMultiplyCore : public IFunction
@@ -140,7 +146,7 @@ private:
     std::unique_ptr<CLDepthConvertLayerKernel>                     _weights_to_qasymm8;
     std::unique_ptr<CLGEMMLowpMatrixMultiplyNativeKernel>          _mm_native_kernel;
     std::unique_ptr<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel> _mm_reshaped_only_rhs_kernel;
-    std::unique_ptr<CLGEMMReshapeRHSMatrixKernel>                  _mtx_b_reshape_kernel;
+    std::unique_ptr<opencl::kernels::ClGemmReshapeRhsMatrixKernel> _mtx_b_reshape_kernel;
     std::unique_ptr<CLGEMMLowpMatrixAReductionKernel>              _mtx_a_reduction_kernel;
     std::unique_ptr<CLGEMMLowpMatrixBReductionKernel>              _mtx_b_reduction_kernel;
     std::unique_ptr<CLGEMMLowpOffsetContributionKernel>            _offset_contribution_kernel;
diff --git a/arm_compute/runtime/CL/functions/CLLogicalAnd.h b/arm_compute/runtime/CL/functions/CLLogicalAnd.h
index 61a15816eb..e3061e1dc3 100644
--- a/arm_compute/runtime/CL/functions/CLLogicalAnd.h
+++ b/arm_compute/runtime/CL/functions/CLLogicalAnd.h
@@ -34,37 +34,6 @@ class CLCompileContext;
 class ICLTensor;
 class ITensorInfo;
 
-namespace experimental
-{
-class CLLogicalAnd : public ICLOperator
-{
-public:
-    /** Default Constructor */
-    CLLogicalAnd() = default;
-    /** Initialise the kernel's inputs, output and conversion policy.
-     *
-     * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] input1          First tensor input. Data types supported: U8.
-     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2          Second tensor input. Data types supported: same as @p input1.
-     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output          Output tensor. Data types supported: same as @p input1.
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClLogicalBinaryKernel
-     *
-     * @param[in] input1 First tensor input info. Data types supported: U8.
-     * @param[in] input2 Second tensor input info. Data types supported: same as @p input1.
-     * @param[in] output Output tensor info. Data types supported: same as @p input1.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-    // Inherited methods overridden:
-    void run(ITensorPack &tensors) override;
-};
-} // namespace experimental
-
 /** Basic function to run @ref arm_compute::opencl::kernels::ClLogicalBinaryKernel.
  *
  * @note The tensor data type for the inputs must be U8.
@@ -125,5 +94,36 @@ private:
     struct Impl;
     std::unique_ptr<Impl> _impl;
 };
+
+namespace experimental
+{
+class CLLogicalAnd : public ICLOperator
+{
+public:
+    /** Default Constructor */
+    CLLogicalAnd() = default;
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: U8.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: same as @p input1.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: same as @p input1.
+     */
+    void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClLogicalBinaryKernel
+     *
+     * @param[in] input1 First tensor input info. Data types supported: U8.
+     * @param[in] input2 Second tensor input info. Data types supported: same as @p input1.
+     * @param[in] output Output tensor info. Data types supported: same as @p input1.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+    // Inherited methods overridden:
+    void run(ITensorPack &tensors) override;
+};
+} // namespace experimental
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLLOGICALAND_H */
diff --git a/arm_compute/runtime/CL/functions/CLLogicalOr.h b/arm_compute/runtime/CL/functions/CLLogicalOr.h
index b9ffb4a449..893c22f721 100644
--- a/arm_compute/runtime/CL/functions/CLLogicalOr.h
+++ b/arm_compute/runtime/CL/functions/CLLogicalOr.h
@@ -34,37 +34,6 @@ class CLCompileContext;
 class ICLTensor;
 class ITensorInfo;
 
-namespace experimental
-{
-class CLLogicalOr : public ICLOperator
-{
-public:
-    /** Default Constructor */
-    CLLogicalOr() = default;
-    /** Initialise the kernel's inputs, output and conversion policy.
-     *
-     * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] input1          First tensor input. Data types supported: U8.
-     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2          Second tensor input. Data types supported: same as @p input1.
-     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output          Output tensor. Data types supported: same as @p input1.
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClLogicalBinaryKernel
-     *
-     * @param[in] input1 First tensor input info. Data types supported: U8.
-     * @param[in] input2 Second tensor input info. Data types supported: same as @p input1.
-     * @param[in] output Output tensor info. Data types supported: same as @p input1.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-    // Inherited methods overridden:
-    void run(ITensorPack &tensors) override;
-};
-} // namespace experimental
-
 /** Basic function to run @ref arm_compute::opencl::kernels::ClLogicalBinaryKernel.
  *
  * @note The tensor data type for the inputs must be U8.
@@ -125,5 +94,36 @@ private:
     struct Impl;
     std::unique_ptr<Impl> _impl;
 };
+
+namespace experimental
+{
+class CLLogicalOr : public ICLOperator
+{
+public:
+    /** Default Constructor */
+    CLLogicalOr() = default;
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: U8.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: same as @p input1.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: same as @p input1.
+     */
+    void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClLogicalBinaryKernel
+     *
+     * @param[in] input1 First tensor input info. Data types supported: U8.
+     * @param[in] input2 Second tensor input info. Data types supported: same as @p input1.
+     * @param[in] output Output tensor info. Data types supported: same as @p input1.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+    // Inherited methods overridden:
+    void run(ITensorPack &tensors) override;
+};
+} // namespace experimental
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLLOGICALOR_H */
diff --git a/arm_compute/runtime/CL/functions/CLSlice.h b/arm_compute/runtime/CL/functions/CLSlice.h
index 7a7689c528..297bcd86fe 100644
--- a/arm_compute/runtime/CL/functions/CLSlice.h
+++ b/arm_compute/runtime/CL/functions/CLSlice.h
@@ -34,45 +34,6 @@ class ICLTensor;
 class CLCompileContext;
 class ITensorInfo;
 
-namespace experimental
-{
-/** Basic function to perform tensor slicing */
-class CLSlice : public ICLOperator
-{
-public:
-    /** Configure kernel
-     *
-     * @note Supported tensor rank: up to 4
-     * @note Start indices must be non-negative. 0 <= starts[i]
-     * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
-     * @note End indices are not inclusive unless negative.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor info. Data type supported: All.
-     * @param[out] output          Destination tensor info. Data type supported: Same as @p input
-     * @param[in]  starts          The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
-     * @param[in]  ends            The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
-     */
-    void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref CLSlice
-     *
-     * @note Supported tensor rank: up to 4
-     * @note Start indices must be non-negative. 0 <= starts[i]
-     * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
-     * @note End indices are not inclusive unless negative.
-     *
-     * @param[in] input  Source tensor info. Data type supported: All
-     * @param[in] output Destination tensor info. Data type supported: Same as @p input
-     * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
-     * @param[in] ends   The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
-     *
-     * @return A status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
-};
-} // namespace experimental
-
 /** Basic function to perform tensor slicing */
 class CLSlice : public IFunction
 {
@@ -148,5 +109,44 @@ private:
     struct Impl;
     std::unique_ptr<Impl> _impl;
 };
+
+namespace experimental
+{
+/** Basic function to perform tensor slicing */
+class CLSlice : public ICLOperator
+{
+public:
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     * @note Start indices must be non-negative. 0 <= starts[i]
+     * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
+     * @note End indices are not inclusive unless negative.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor info. Data type supported: All.
+     * @param[out] output          Destination tensor info. Data type supported: Same as @p input
+     * @param[in]  starts          The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in]  ends            The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     */
+    void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+
+    /** Static function to check if given info will lead to a valid configuration of @ref CLSlice
+     *
+     * @note Supported tensor rank: up to 4
+     * @note Start indices must be non-negative. 0 <= starts[i]
+     * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
+     * @note End indices are not inclusive unless negative.
+     *
+     * @param[in] input  Source tensor info. Data type supported: All
+     * @param[in] output Destination tensor info. Data type supported: Same as @p input
+     * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in] ends   The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     *
+     * @return A status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+};
+} // namespace experimental
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CL_SLICE_H */
-- 
cgit v1.2.1