56 files changed, 0 insertions, 5290 deletions
diff --git a/src/runtime/gpu/cl/IClOperator.h b/src/runtime/gpu/cl/IClOperator.h
deleted file mode 100644
index 049bf05dc1..0000000000
--- a/src/runtime/gpu/cl/IClOperator.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICL_OPERATOR_H
-#define ARM_COMPUTE_ICL_OPERATOR_H
-
-#include "arm_compute/core/ITensorInfo.h"
-#include "arm_compute/runtime/CL/ICLOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-using IClOperator = experimental::ICLOperator;
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_ICL_OPERATOR_H */
diff --git a/src/runtime/gpu/cl/operators/ClActivation.cpp b/src/runtime/gpu/cl/operators/ClActivation.cpp
deleted file mode 100644
index 71aa57bdbd..0000000000
--- a/src/runtime/gpu/cl/operators/ClActivation.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClActivation.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClActivationKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClActivation::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
-    auto k = std::make_unique<kernels::ClActivationKernel>();
-    k->configure(compile_context, src, dst, act_info);
-    _kernel = std::move(k);
-}
-
-Status ClActivation::validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
-    return kernels::ClActivationKernel::validate(src, dst, act_info);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClActivation.h b/src/runtime/gpu/cl/operators/ClActivation.h
deleted file mode 100644
index 235b826b87..0000000000
--- a/src/runtime/gpu/cl/operators/ClActivation.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_ACTIVATION_H
-#define ARM_COMPUTE_CL_ACTIVATION_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClActivationKernel */
-class ClActivation : public IClOperator
-{
-public:
-    /** Constructor */
-    ClActivation() = default;
-    /** Configure operator for a given list of arguments
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
-     * @param[out] dst             Destination tensor info. Data type supported: same as @p src
-     * @param[in]  activation_info Activation layer parameters.
-     */
-    void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ActivationLayerInfo &activation_info);
-    /** Static function to check if given info will lead to a valid configuration of @ref ClActivation
-     *
-     * @param[in] src      Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
-     * @param[in] dst      Destination tensor info. Data type supported: same as @p src
-     * @param[in] act_info Activation layer information.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_ACTIVATION_H */
diff --git a/src/runtime/gpu/cl/operators/ClAdd.cpp b/src/runtime/gpu/cl/operators/ClAdd.cpp
deleted file mode 100644
index 01f550f819..0000000000
--- a/src/runtime/gpu/cl/operators/ClAdd.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClAdd.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClAdd::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst,
-                      ConvertPolicy policy, const ActivationLayerInfo &act_info)
-{
-    auto k = std::make_unique<kernels::ClSaturatedArithmeticKernel>();
-    k->configure(compile_context, ArithmeticOperation::ADD, src1, src2, dst, policy, act_info);
-    _kernel = std::move(k);
-}
-
-Status ClAdd::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst,
-                       ConvertPolicy policy, const ActivationLayerInfo &act_info)
-{
-    return kernels::ClSaturatedArithmeticKernel::validate(ArithmeticOperation::ADD, src1, src2, dst, policy, act_info);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClAdd.h b/src/runtime/gpu/cl/operators/ClAdd.h
deleted file mode 100644
index f751d8dc83..0000000000
--- a/src/runtime/gpu/cl/operators/ClAdd.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_ADD_H
-#define ARM_COMPUTE_CL_ADD_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run arithmetic addition
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * @note The function performs an arithmetic addition between two tensors.
- */
-class ClAdd : public IClOperator
-{
-public:
-    /** Default Constructor */
-    ClAdd() = default;
-    /** Configure function for a given list of arguments.
-     *
-     * Valid configurations (src1,src2) -> dst :
-     *
-     *   - (U8,U8)           -> U8
-     *   - (U8,U8)           -> S16
-     *   - (S16,U8)          -> S16
-     *   - (U8,S16)          -> S16
-     *   - (S16,S16)         -> S16
-     *   - (S32,S32)         -> S32
-     *   - (F16,F16)         -> F16
-     *   - (F32,F32)         -> F32
-     *   - (QASYMM8,QASYMM8) -> QASYMM8
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (QSYMM16,QSYMM16) -> QSYMM16
-     *
-     * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] src1            First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
-     *                                 The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] src2            Second source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
-     *                                 The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     dst             Destination tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
-     * @param[in]      policy          Policy to use to handle overflow.
-     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
-     */
-    void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, ConvertPolicy policy,
-                   const ActivationLayerInfo &act_info = ActivationLayerInfo());
-    /** Static function to check if given info will lead to a valid configuration of @ref ClAdd
-     *
-     * Valid configurations (src1,src2) -> dst :
-     *
-     *   - (U8,U8)           -> U8
-     *   - (U8,U8)           -> S16
-     *   - (S16,U8)          -> S16
-     *   - (U8,S16)          -> S16
-     *   - (S16,S16)         -> S16
-     *   - (S32,S32)         -> S32
-     *   - (F16,F16)         -> F16
-     *   - (F32,F32)         -> F32
-     *   - (QASYMM8,QASYMM8) -> QASYMM8
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (QSYMM16,QSYMM16) -> QSYMM16
-     *
-     * @param[in] src1     First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
-     * @param[in] src2     Second source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
-     * @param[in] dst      Destination tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
-     * @param[in] policy   Policy to use to handle overflow.
-     * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, ConvertPolicy policy,
-                           const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_ADD_H */
diff --git a/src/runtime/gpu/cl/operators/ClCast.cpp b/src/runtime/gpu/cl/operators/ClCast.cpp
deleted file mode 100644
index 3f54004aa7..0000000000
--- a/src/runtime/gpu/cl/operators/ClCast.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClCast.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClCastKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClCast::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy)
-{
-    auto k = std::make_unique<kernels::ClCastKernel>();
-    k->configure(compile_context, src, dst, policy);
-    _kernel = std::move(k);
-}
-
-Status ClCast::validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy)
-{
-    return kernels::ClCastKernel::validate(src, dst, policy);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClCast.h b/src/runtime/gpu/cl/operators/ClCast.h
deleted file mode 100644
index 69e028debd..0000000000
--- a/src/runtime/gpu/cl/operators/ClCast.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_CAST_H
-#define ARM_COMPUTE_CL_CAST_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClCastKernel */
-class ClCast : public IClOperator
-{
-public:
-    /** Constructor */
-    ClCast() = default;
-    /** Configure operator for a given list of arguments
-     *
-     * @note Input data type must be different than output data type.
-     *
-     * Valid data layouts:
-     * - All
-     *
-     * Valid data type configurations:
-     * |src            |dst                                    |
-     * |:--------------|:--------------------------------------|
-     * |U8             | S8, U16, S16, U32, S32, F16, F32      |
-     * |U16            | U8, S8, S16, U32, S32, F16, F32       |
-     * |S16            | U8, S8, U16, U32, S32, F16, F32       |
-     * |U32            | U8, S8, U16, S16, S32, F16, F32       |
-     * |S32            | U8, S8, U16, S16, U32, F16, F32       |
-     * |F16            | U8, S8, U16, S16, U32, F32            |
-     * |F32            | U8, S8, U16, S16, U32, F16            |
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             The source tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
-     * @param[out] dst             The destinatio tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
-     * @param[in]  policy          Conversion policy.
-     */
-    void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy);
-    /** Static function to check if given info will lead to a valid configuration
-     *
-     * Similar to @ref ClCast::configure()
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_CAST_H */
diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.cpp b/src/runtime/gpu/cl/operators/ClConcatenate.cpp
deleted file mode 100644
index 4385fcfaed..0000000000
--- a/src/runtime/gpu/cl/operators/ClConcatenate.cpp
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClConcatenate.h"
-
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-#include "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h"
-#include "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h"
-#include "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h"
-#include "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h"
-#include "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h"
-#include "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "src/core/helpers/AutoConfiguration.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-ClConcatenate::ClConcatenate()
-    : _concat_kernels(),
-      _num_inputs(0),
-      _axis(Window::DimX)
-{
-}
-
-void ClConcatenate::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis)
-{
-    ARM_COMPUTE_ERROR_ON(dst == nullptr);
-    _axis       = axis;
-    _num_inputs = src_vector.size();
-
-    TensorShape                      dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, _axis);
-    std::vector<const ITensorInfo *> const_src_vector(src_vector.size());
-    std::transform(src_vector.begin(), src_vector.end(), const_src_vector.begin(), [](ITensorInfo * t)
-    {
-        ARM_COMPUTE_ERROR_ON_NULLPTR(t);
-        return t;
-    });
-
-    // dst auto inizialitation if not yet initialized
-    auto_init_if_empty(*dst, dst_shape, 1, src_vector[0]->data_type());
-    ARM_COMPUTE_ERROR_THROW_ON(ClConcatenate::validate(const_src_vector, dst, axis));
-
-    unsigned int offset = 0;
-    switch(_axis)
-    {
-        case Window::DimX:
-        {
-            switch(_num_inputs)
-            {
-                case 2:
-                {
-                    // Configure WidthConcatenate2Tensors kernel
-                    auto kernel = std::make_unique<kernels::ClWidthConcatenate2TensorsKernel>();
-                    kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), dst);
-                    _concat_kernels.emplace_back(std::move(kernel));
-                    break;
-                }
-                case 4:
-                {
-                    // Configure WidthConcatenate4Tensors kernel
-                    auto kernel = std::make_unique<kernels::ClWidthConcatenate4TensorsKernel>();
-                    kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), src_vector.at(2), src_vector.at(3), dst);
-                    _concat_kernels.emplace_back(std::move(kernel));
-                    break;
-                }
-                default:
-                {
-                    // Configure generic case WidthConcatenate kernels
-                    for(unsigned int i = 0; i < _num_inputs; ++i)
-                    {
-                        auto kernel = std::make_unique<kernels::ClWidthConcatenateKernel>();
-                        kernel->configure(compile_context, src_vector.at(i), offset, dst);
-                        offset += src_vector.at(i)->dimension(_axis);
-                        _concat_kernels.emplace_back(std::move(kernel));
-                    }
-                    break;
-                }
-            }
-            break;
-        }
-        case Window::DimY:
-        {
-            for(unsigned int i = 0; i < _num_inputs; ++i)
-            {
-                auto kernel = std::make_unique<kernels::ClHeightConcatenateKernel>();
-                kernel->configure(compile_context, src_vector.at(i), offset, dst);
-                offset += src_vector.at(i)->dimension(_axis);
-                _concat_kernels.emplace_back(std::move(kernel));
-            }
-            break;
-        }
-        case Window::DimZ:
-        {
-            for(unsigned int i = 0; i < _num_inputs; ++i)
-            {
-                auto kernel = std::make_unique<kernels::ClDepthConcatenateKernel>();
-                kernel->configure(compile_context, src_vector.at(i), offset, dst);
-                offset += src_vector.at(i)->dimension(_axis);
-                _concat_kernels.emplace_back(std::move(kernel));
-            }
-            break;
-        }
-        case 3:
-        {
-            for(unsigned int i = 0; i < _num_inputs; ++i)
-            {
-                auto kernel = std::make_unique<kernels::ClBatchConcatenateKernel>();
-                kernel->configure(compile_context, src_vector.at(i), offset, dst);
-                offset += src_vector.at(i)->dimension(_axis);
-                _concat_kernels.emplace_back(std::move(kernel));
-            }
-            break;
-        }
-        default:
-            ARM_COMPUTE_ERROR("Axis not supported");
-    }
-}
-
-Status ClConcatenate::validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis)
-{
-    ARM_COMPUTE_RETURN_ERROR_ON(dst == nullptr);
-    const unsigned int num_inputs = src_vector.size();
-
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst);
-    ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2);
-
-    unsigned int offset = 0;
-    switch(axis)
-    {
-        case Window::DimX:
-        {
-            switch(num_inputs)
-            {
-                case 2:
-                    // Validate WidthConcatenate2Tensors kernels if there are 2 inputs
-                    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1]);
-                    ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate2TensorsKernel::validate(src_vector[0], src_vector[1], dst));
-                    break;
-                case 4:
-                    // Validate WidthConcatenate4Tensors kernels if there are 4 inputs
-                    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1], src_vector[2], src_vector[3]);
-                    ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate4TensorsKernel::validate(src_vector[0], src_vector[1], src_vector[2], src_vector[3], dst));
-                    break;
-                default:
-                    // Validate generic case of WidthConcatenate kernel
-                    for(const auto &src : src_vector)
-                    {
-                        ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
-                        ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenateKernel::validate(src, offset, dst));
-                        offset += src->dimension(axis);
-                    }
-                    break;
-            }
-            break;
-        }
-        case Window::DimY:
-        {
-            for(const auto &src : src_vector)
-            {
-                ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClHeightConcatenateKernel::validate(src, offset, dst));
-                offset += src->dimension(axis);
-            }
-            break;
-        }
-        case Window::DimZ:
-        {
-            for(const auto &src : src_vector)
-            {
-                ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDepthConcatenateKernel::validate(src, offset, dst));
-                offset += src->dimension(axis);
-            }
-            break;
-        }
-        case 3:
-        {
-            for(const auto &src : src_vector)
-            {
-                ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClBatchConcatenateKernel::validate(src, offset, dst));
-                offset += src->dimension(axis);
-            }
-            break;
-        }
-        default:
-            ARM_COMPUTE_ERROR("Axis not supported");
-    }
-
-    if(dst->total_size() != 0)
-    {
-        TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, axis);
-        ARM_COMPUTE_RETURN_ERROR_ON(dst_shape.total_size() != dst->tensor_shape().total_size());
-    }
-
-    return Status{};
-}
-
-void ClConcatenate::run(ITensorPack &tensors)
-{
-    if(tensors.empty())
-    {
-        ARM_COMPUTE_ERROR("No inputs provided");
-    }
-
-    if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs))
-    {
-        ARM_COMPUTE_ERROR("Configured with different number of inputs");
-    }
-
-    if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4))
-    {
-        ARM_COMPUTE_ERROR_ON(_concat_kernels.empty());
-        CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true);
-    }
-    else
-    {
-        int i = 0;
-        for(auto &k : _concat_kernels)
-        {
-            ITensorPack pack;
-            pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i));
-            pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST));
-            CLScheduler::get().enqueue_op(*k, pack, true);
-            ++i;
-        }
-    }
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.h b/src/runtime/gpu/cl/operators/ClConcatenate.h
deleted file mode 100644
index 0d960a605c..0000000000
--- a/src/runtime/gpu/cl/operators/ClConcatenate.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCONCATENATE_H
-#define ARM_COMPUTE_CLCONCATENATE_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/IClKernel.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-#include <vector>
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
- *
- * -# @ref kernels::ClWidthConcatenateKernel (if underlying concatenation axis is 0).
- * -# @ref kernels::ClHeightConcatenateKernel (if underlying concatenation axis is 1).
- * -# @ref kernels::ClDepthConcatenateKernel (if underlying concatenation axis is 2).
- * -# @ref kernels::ClBatchConcatenateKernel (if underlying concatenation axis is 3).
- */
-class ClConcatenate : public IClOperator
-{
-public:
-    /** Default constructor */
-    ClConcatenate();
-    /** Initialise the kernel's inputs vector and dst.
-     *
-     * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis.
-     * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel,
-     *       @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel.
-     *
-     *
-     * @param[in]     compile_context The compile context to be used.
-     * @param[in,out] src_vector      The vectors containing all the tensors info to concatenate. Data types supported: All
-     * @param[out]    dst             Destination tensor info. Data types supported: same as @p src_vector.
-     * @param[in]     axis            Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
-     */
-    void configure(const ClCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis);
-    /** Static function to check if given info will lead to a valid configuration of @ref ClConcatenate
-     *
-     * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis.
-     * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel,
-     *       @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel.
-     *
-     * @param[in] src_vector The vectors containing all the tensors info to concatenate. Data types supported: All
-     * @param[in] dst        Destination tensor info. Data types supported: same as @p src_vector.
-     * @param[in] axis       Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
-     *
-     * @return a status
-     */
-    static Status validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis);
-
-    // Inherited methods overridden:
-    void run(ITensorPack &tensors) override;
-
-private:
-    std::vector<std::unique_ptr<IClKernel>> _concat_kernels;
-    unsigned int                            _num_inputs;
-    unsigned int                            _axis;
-};
-} // namespace opencl
-} // namespace arm_comPUTE
-#endif /* ARM_COMPUTE_CL_CONCATENATE_H */
diff --git a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp b/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp
deleted file mode 100644
index 0d2f2925d3..0000000000
--- a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClConvertFullyConnectedWeights::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout)
-{
-    auto k = std::make_unique<kernels::ClConvertFullyConnectedWeightsKernel>();
-    k->configure(compile_context, src, dst, original_src_shape, data_layout);
-    _kernel = std::move(k);
-}
-
-Status ClConvertFullyConnectedWeights::validate(const ITensorInfo *src, const ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout)
-{
-    return kernels::ClConvertFullyConnectedWeightsKernel::validate(src, dst, original_src_shape, data_layout);
-}
-} // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h b/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h
deleted file mode 100644
index efedc2fcb7..0000000000
--- a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_CONVERTFULLYCONNECTEDWEIGHTS_H
-#define ARM_COMPUTE_CL_CONVERTFULLYCONNECTEDWEIGHTS_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClConvertFullyConnectedWeightsKernel */
-class ClConvertFullyConnectedWeights : public IClOperator
-{
-public:
-    /** Constructor */
-    ClConvertFullyConnectedWeights() = default;
-    /** Initialise the kernel's inputs and outputs
-     *
-     * @param[in] compile_context    The compile context to be used.
-     * @param[in] src                The src tensor info. Data types supported: All.
-     * @param[in] dst                The dst tensor info. Data types supported: Same as @p src
-     * @param[in] original_src_shape Shape of the original src tensor (the one entering fully connected layer).
-     * @param[in] data_layout        The data layout the weights have been trained in.
-     */
-    void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout);
-    /**  Static function to check if given info will lead to a valid configuration of @ref kernels::ClConvertFullyConnectedWeightsKernel.
-     *
-     * @param[in] src                First tensor src info. Data types supported: All.
-     * @param[in] dst                Output tensor info. Data types supported: same as @p src.
-     * @param[in] original_src_shape Shape of the original src tensor (the one entering fully connected layer).
-     * @param[in] data_layout        The data layout the weights have been trained in.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_CONVERTFULLYCONNECTEDWEIGHTS_H */
diff --git a/src/runtime/gpu/cl/operators/ClCopy.cpp b/src/runtime/gpu/cl/operators/ClCopy.cpp
deleted file mode 100644
index 2bdb1f5ba1..0000000000
--- a/src/runtime/gpu/cl/operators/ClCopy.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClCopy.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClCopyKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClCopy::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, Window *dst_window)
-{
-    auto k = std::make_unique<kernels::ClCopyKernel>();
-    k->configure(compile_context, src, dst, dst_window);
-    _kernel = std::move(k);
-}
-
-Status ClCopy::validate(const ITensorInfo *src, const ITensorInfo *dst, Window *dst_window)
-{
-    return kernels::ClCopyKernel::validate(src, dst, dst_window);
-}
-} // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClCopy.h b/src/runtime/gpu/cl/operators/ClCopy.h
deleted file mode 100644
index 0b99676f65..0000000000
--- a/src/runtime/gpu/cl/operators/ClCopy.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_COPY_H
-#define ARM_COMPUTE_CL_COPY_H
-
-#include "arm_compute/core/Window.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClCopyKernel */
-class ClCopy : public IClOperator
-{
-public:
-    /** Constructor */
-    ClCopy() = default;
-    /** Initialise the function's source and destination.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor info. Data types supported: All.
-     * @param[out] dst             Output tensor info. Data types supported: Same as @p src.
-     * @param[in]  dst_window      (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
-     *
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, Window *dst_window = nullptr);
-    /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClCopyKernel
-     *
-     * @param[in] src        Source tensor info. Data types supported: All.
-     * @param[in] dst        Output tensor info. Data types supported: Same as @p src.
-     * @param[in] dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst, Window *dst_window = nullptr);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_COPY_H */
diff --git a/src/runtime/gpu/cl/operators/ClCrop.cpp b/src/runtime/gpu/cl/operators/ClCrop.cpp
deleted file mode 100644
index 17bb11912f..0000000000
--- a/src/runtime/gpu/cl/operators/ClCrop.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClCrop.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClCropKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClCrop::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value,
-                       Window *dst_window)
-{
-    auto k = std::make_unique<kernels::ClCropKernel>();
-    k->configure(compile_context, src, dst, start, end, batch_index, extrapolation_value, dst_window);
-    _kernel = std::move(k);
-}
-
-Status ClCrop::validate(const ITensorInfo *src, const ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *dst_window)
-{
-    return kernels::ClCropKernel::validate(src, dst, start, end, batch_index, extrapolation_value, dst_window);
-}
-} // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClCrop.h b/src/runtime/gpu/cl/operators/ClCrop.h
deleted file mode 100644
index acfbf14742..0000000000
--- a/src/runtime/gpu/cl/operators/ClCrop.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_COPY_H
-#define ARM_COMPUTE_CL_COPY_H
-
-#include "arm_compute/core/Window.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClCropKernel */
-class ClCrop : public IClOperator
-{
-public:
-    /** Constructor */
-    ClCrop() = default;
-    /** Initialise the function's source and destination.
-     *
-     * @note Supported tensor rank: up to 4
-     *
-     * @param[in]  compile_context     The compile context to be used.
-     * @param[in]  src                 Source tensor info. Data type supported: All. Data layouts supported: NHWC.
-     * @param[out] dst                 Destination tensor info. Data type supported: F32
-     * @param[in]  start               Coordinates of where to start cropping the image.
-     * @param[in]  end                 Coordinates of where to end cropping the image.
-     * @param[in]  batch_index         Fourth dimension index of the 3D image to crop in @p src.
-     * @param[in]  extrapolation_value Value to be used for values outside of the image. Default is 0.
-     * @param[in]  dst_window          Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
-     */
-    void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
-                   Window *dst_window = nullptr);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClCropKernel
-     *
-     * @note Supported tensor rank: up to 4
-     *
-     * @param[in] src                 Source tensor info. Data type supported: All. Data layouts supported: NHWC.
-     * @param[in] dst                 Destination tensor info. Data type supported: F32
-     * @param[in] start               Coordinates of where to start cropping the image.
-     * @param[in] end                 Coordinates of where to end cropping the image.
-     * @param[in] batch_index         Fourth dimension index of the 3D image to crop in @p src.
-     * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
-     * @param[in] dst_window          Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
-                           Window *dst_window = nullptr);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_COPY_H */
diff --git a/src/runtime/gpu/cl/operators/ClDequantize.cpp b/src/runtime/gpu/cl/operators/ClDequantize.cpp
deleted file mode 100644
index 0c1391bb45..0000000000
--- a/src/runtime/gpu/cl/operators/ClDequantize.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClDequantize.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClDequantizeKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClDequantize::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::ClDequantizeKernel>();
-    k->configure(compile_context, src, dst);
-    _kernel = std::move(k);
-}
-
-Status ClDequantize::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    return kernels::ClDequantizeKernel::validate(src, dst);
-}
-
-void ClDequantize::run(ITensorPack &tensors)
-{
-    ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
-    CLScheduler::get().enqueue_op(*_kernel.get(), tensors);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClDequantize.h b/src/runtime/gpu/cl/operators/ClDequantize.h
deleted file mode 100644
index 47fad3eeee..0000000000
--- a/src/runtime/gpu/cl/operators/ClDequantize.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_DEQUANTIZE_H
-#define ARM_COMPUTE_CL_DEQUANTIZE_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClDequantizeKernel that dequantizes an input tensor */
-class ClDequantize : public IClOperator
-{
-public:
-    /** Constructor */
-    ClDequantize() = default;
-    /** Set the input and output tensors.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
-     * @param[out] dst             Destination tensor info with the same dimensions of @p src. Data type supported: F16/F32.
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration
-     *
-     * Similar to @ref ClDequantize::configure()
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-
-    // Inherited method overridden
-    void run(ITensorPack &tensors) override;
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_DEQUANTIZE_H */
diff --git a/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp b/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp
deleted file mode 100644
index 13ef42a640..0000000000
--- a/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClDirectConv2d.h"
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClActivationKernel.h"
-#include "src/core/gpu/cl/kernels/ClDirectConv2dKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-namespace
-{
-ITensorPack select_activation_src_dst(ITensorPack &tensors)
-{
-    ITensorPack pack;
-    pack.add_tensor(TensorType::ACL_SRC, tensors.get_tensor(TensorType::ACL_DST));
-    pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(TensorType::ACL_DST));
-    return pack;
-}
-} // namespace
-
-void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
-                               const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(src);
-
-    // Configure direct convolution kernel
-    const ActivationLayerInfo conv2d_act_info = (src->data_layout() == DataLayout::NHWC && is_data_type_float(src->data_type())) ? act_info : ActivationLayerInfo();
-    auto                      k               = std::make_unique<kernels::ClDirectConv2dKernel>();
-    k->set_target(CLScheduler::get().target());
-    k->configure(compile_context, src, weights, biases, dst, conv_info, conv2d_act_info);
-    _direct_conv_kernel = std::move(k);
-
-    // Configure border handler
-    PixelValue zero_value(0.f);
-    if(is_data_type_quantized_asymmetric(src->data_type()))
-    {
-        zero_value = PixelValue(0, src->data_type(), src->quantization_info());
-    }
-    auto b = std::make_unique<CLFillBorderKernel>();
-    b->configure(compile_context, src, _direct_conv_kernel->border_size(), BorderMode::CONSTANT, zero_value);
-    _src_border_handler = std::move(b);
-
-    // Fused activation is currently supported for NHWC and floating point types
-    if(act_info.enabled() && !conv2d_act_info.enabled())
-    {
-        auto a = std::make_unique<kernels::ClActivationKernel>();
-        a->configure(compile_context, dst, dst, act_info);
-        _activation_kernel = std::move(a);
-    }
-
-    // Tune kernels
-    CLScheduler::get().tune_kernel_static(*_direct_conv_kernel);
-}
-
-Status ClDirectConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
-                                const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
-{
-    ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDirectConv2dKernel::validate(src, weights, biases, dst, conv_info, ActivationLayerInfo(), CLScheduler::get().target()));
-    if(act_info.enabled())
-    {
-        ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClActivationKernel::validate(dst, dst, act_info));
-    }
-    return Status{};
-}
-
-void ClDirectConv2d::run(ITensorPack &tensors)
-{
-    // Run border handler
-    CLScheduler::get().enqueue_op(*_src_border_handler.get(), tensors, false);
-    // Run direct convolution
-    CLScheduler::get().enqueue_op(*_direct_conv_kernel.get(), tensors, false);
-    // Run activation kernel
-    if(_activation_kernel)
-    {
-        auto act_pack = select_activation_src_dst(tensors);
-        CLScheduler::get().enqueue_op(*_activation_kernel.get(), act_pack, false);
-    }
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClDirectConv2d.h b/src/runtime/gpu/cl/operators/ClDirectConv2d.h
deleted file mode 100644
index e069733fab..0000000000
--- a/src/runtime/gpu/cl/operators/ClDirectConv2d.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_DIRECT_CONV2D_H
-#define ARM_COMPUTE_CL_DIRECT_CONV2D_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/IClKernel.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to simulate a directly convolution layer. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if padding size is different from zero)
- * -# @ref opencl::ClDirectConv2d
- */
-class ClDirectConv2d : public IClOperator
-{
-public:
-    /** Constructor */
-    ClDirectConv2d() = default;
-    /** Set the src and dst tensors.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor. 3 lower dimensions represent a single src [width, height, IFM],
-     *                             while every optional dimension from 4 and above represent a batch of srcs.
-     *                             Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
-     * @param[in]  weights         Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p src.
-     * @param[in]  biases          Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
-     *                             Data type supported: Should match @p src data type, except for src of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type.
-     * @param[out] dst             Destination tensor. 3 lower dimensions represent a single dst [width, height, OFM], while the rest represent batch of dsts.
-     *                             Data types supported: Same as @p src.
-     * @param[in]  conv_info       Contains padding and stride information described in @ref PadStrideInfo.
-     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
-     *
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info,
-                   const ActivationLayerInfo &act_info = ActivationLayerInfo());
-    /** Static function to check if given info will lead to a valid configuration
-     *
-     * Similar to ClDirectConv2d::configure()
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info,
-                           const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
-    // Inherited method overridden
-    void run(ITensorPack &tensors) override;
-
-private:
-    std::unique_ptr<IClKernel> _direct_conv_kernel{ nullptr };
-    std::unique_ptr<IClKernel> _src_border_handler{ nullptr };
-    std::unique_ptr<IClKernel> _activation_kernel{ nullptr };
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_DIRECT_CONV2D_H */
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp b/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp
deleted file mode 100644
index e5b836a0d8..0000000000
--- a/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClElementwiseOperations.h"
-
-#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClElementwiseDivision::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
-    auto k = std::make_unique<kernels::ClArithmeticKernel>();
-    k->configure(compile_context, ArithmeticOperation::DIV, src1, src2, dst, act_info);
-    _kernel = std::move(k);
-}
-
-Status ClElementwiseDivision::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
-    return kernels::ClArithmeticKernel::validate(ArithmeticOperation::DIV, src1, src2, dst, act_info);
-}
-
-void ClElementwiseMax::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
-    auto k = std::make_unique<kernels::ClArithmeticKernel>();
-    k->configure(compile_context, ArithmeticOperation::MAX, src1, src2, dst, act_info);
-    _kernel = std::move(k);
-}
-
-Status ClElementwiseMax::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
-    return kernels::ClArithmeticKernel::validate(ArithmeticOperation::MAX, src1, src2, dst, act_info);
-}
-
-void ClElementwiseMin::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
-    auto k = std::make_unique<kernels::ClArithmeticKernel>();
-    k->configure(compile_context, ArithmeticOperation::MIN, src1, src2, dst, act_info);
-    _kernel = std::move(k);
-}
-
-Status ClElementwiseMin::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
-    return kernels::ClArithmeticKernel::validate(ArithmeticOperation::MIN, src1, src2, dst, act_info);
-}
-
-void ClElementwiseSquaredDiff::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
-    auto k = std::make_unique<kernels::ClArithmeticKernel>();
-    k->configure(compile_context, ArithmeticOperation::SQUARED_DIFF, src1, src2, dst, act_info);
-    _kernel = std::move(k);
-}
-
-Status ClElementwiseSquaredDiff::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
-    return kernels::ClArithmeticKernel::validate(ArithmeticOperation::SQUARED_DIFF, src1, src2, dst, act_info);
-}
-
-void ClElementwisePower::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
-    auto k = std::make_unique<kernels::ClArithmeticKernel>();
-    k->configure(compile_context, ArithmeticOperation::POWER, src1, src2, dst, act_info);
-    _kernel = std::move(k);
-}
-
-Status ClElementwisePower::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
-    return kernels::ClArithmeticKernel::validate(ArithmeticOperation::POWER, src1, src2, dst, act_info);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClElementwiseOperations.h b/src/runtime/gpu/cl/operators/ClElementwiseOperations.h
deleted file mode 100644
index b9ab1405c8..0000000000
--- a/src/runtime/gpu/cl/operators/ClElementwiseOperations.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H
-#define ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for division
- *
- * @note The tensor data type for the inputs must be F16/F32.
- * @note The function performs an arithmetic division between two tensors.
- */
-class ClElementwiseDivision : public IClOperator
-{
-public:
-    /** Default Constructor */
-    ClElementwiseDivision() = default;
-    /** Configure function for a given list of arguments.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src1            First source tensor info. Data types supported: F16/F32.
-     * @param[in]  src2            Second source tensor info. same as @p src1.
-     * @param[out] dst             Destination tensor info. Data types supported: same as @p src1.
-     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
-     */
-    void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-    /** Static function to check if given info will lead to a valid configuration of @ref ClElementwiseDivision
-     *
-     * @param[in] src1     First source tensor info. Data types supported: F16/F32.
-     * @param[in] src2     Second source tensor info. Data types supported: same as @p src1.
-     * @param[in] dst      Destination tensor info. Data types supported: same as @p src1.
-     * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-
-/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for max
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
- * @note The function performs a max operation between two tensors.
- */
-class ClElementwiseMax : public IClOperator
-{
-public:
-    /** Default Constructor */
-    ClElementwiseMax() = default;
-    /** Configure function for a given list of arguments.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src1            First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
-     * @param[in]  src2            Second source tensor info. Data types supported: same as @p src1.
-     * @param[out] dst             Destination tensor info. Data types supported: same as @p src1.
-     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
-     */
-    void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-    /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for max
-     *
-     * @param[in] src1     First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
-     * @param[in] src2     Second source tensor info. Data types supported: same as @p src1.
-     * @param[in] dst      Destination tensor info. Data types supported: same as @p src1.
-     * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-
-/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for min
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
- * @note The function performs a max operation between two tensors.
- */
-class ClElementwiseMin : public IClOperator
-{
-public:
-    /** Default Constructor */
-    ClElementwiseMin() = default;
-    /** Configure function for a given list of arguments.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src1            First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
-     * @param[in]  src2            Second source tensor info. Data types supported: same as @p src1.
-     * @param[out] dst             Destination tensor info. Data types supported: same as @p src1.
-     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
-     */
-    void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-    /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for min
-     *
-     * @param[in] src1     First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
-     * @param[in] src2     Second source tensor info. Data types supported: same as @p src1.
-     * @param[in] dst      Destination tensor info. Data types supported: same as @p src1.
-     * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-
-/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for squared difference
- *
- * @note The tensor data type for the inputs must be QASYMM8/U8/S16/QSYMM16/F16/F32.
- * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2
- */
-class ClElementwiseSquaredDiff : public IClOperator
-{
-public:
-    /** Default Constructor */
-    ClElementwiseSquaredDiff() = default;
-    /** Configure function for a given list of arguments.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src1            First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
-     * @param[in]  src2            Second source tensor info. Data types supported: same as @p src1.
-     * @param[out] dst             Destination tensor info. Data types supported: same as @p src1.
-     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
-     */
-    void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-    /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for squared difference
-     *
-     * @param[in] src1     First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
-     * @param[in] src2     Second source tensor info. Data types supported: same as @p src1.
-     * @param[in] dst      Destination tensor info. Data types supported: same as @p src1.
-     * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-
-/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for power
- *
- * @note The tensor data type for the inputs must be F16/F32.
- * @note The function performs an elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i])
- */
-class ClElementwisePower : public IClOperator
-{
-public:
-    /** Default Constructor */
-    ClElementwisePower() = default;
-    /** Configure function for a given list of arguments.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src1            First source tensor info. Data types supported: F16/F32.
-     * @param[in]  src2            Second source tensor info. Data types supported: F16/F32.
-     * @param[out] dst             Destination tensor info. Data types supported:F16/F32.
-     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
-     */
-    void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-    /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for power
-     *
-     * @param[in] src1     First source tensor info. Data types supported: F16/F32.
-     * @param[in] src2     Second source tensor info. Data types supported: F16/F32.
-     * @param[in] dst      Destination tensor info. Data types supported: F16/F32.
-     * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H */
diff --git a/src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp b/src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp
deleted file mode 100644
index 7b830a077f..0000000000
--- a/src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClElementwiseUnary.h"
-
-#include "src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClRsqrt::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
-    k->configure(compile_context, src, dst, ElementWiseUnary::RSQRT);
-    _kernel = std::move(k);
-}
-
-Status ClRsqrt::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::RSQRT);
-}
-
-void ClExp::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
-    k->configure(compile_context, src, dst, ElementWiseUnary::EXP);
-    _kernel = std::move(k);
-}
-
-Status ClExp::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::EXP);
-}
-
-void ClNeg::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
-    k->configure(compile_context, src, dst, ElementWiseUnary::NEG);
-    _kernel = std::move(k);
-}
-
-Status ClNeg::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::NEG);
-}
-
-void ClSin::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
-    k->configure(compile_context, src, dst, ElementWiseUnary::SIN);
-    _kernel = std::move(k);
-}
-
-Status ClSin::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::SIN);
-}
-
-void ClAbs::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
-    k->configure(compile_context, src, dst, ElementWiseUnary::ABS);
-    _kernel = std::move(k);
-}
-
-Status ClAbs::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::ABS);
-}
-
-void ClLog::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
-    k->configure(compile_context, src, dst, ElementWiseUnary::LOG);
-    _kernel = std::move(k);
-}
-
-Status ClLog::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::LOG);
-}
-
-void ClRound::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
-    k->configure(compile_context, src, dst, ElementWiseUnary::ROUND);
-    _kernel = std::move(k);
-}
-
-Status ClRound::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::ROUND);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClElementwiseUnary.h b/src/runtime/gpu/cl/operators/ClElementwiseUnary.h
deleted file mode 100644
index b40e3e9a3b..0000000000
--- a/src/runtime/gpu/cl/operators/ClElementwiseUnary.h
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_ELEMENTWISE_UNARY_H
-#define ARM_COMPUTE_CL_ELEMENTWISE_UNARY_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to perform inverse square root on an src tensor. */
-class ClRsqrt : public IClOperator
-{
-public:
-    /** Constructor */
-    ClRsqrt() = default;
-    /** Initialize the function
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor info. Data types supported: F16/F32.
-     * @param[out] dst             Destination tensor info. Data types supported: same as @p src.
-     */
-    void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration of @ref ClRsqrt
-     *
-     * @param[in] src First source tensor info. Data types supported: F16/F32.
-     * @param[in] dst Destination tensor info. Data types supported: same as @p src.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-
-/** Basic function to perform exponential on an src tensor. */
-class ClExp : public IClOperator
-{
-public:
-    /** Constructor */
-    ClExp() = default;
-    /** Initialize the function
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor info. Data types supported: F16/F32.
-     * @param[out] dst             Destination tensor info. Data types supported: same as @p src.
-     */
-    void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration of @ref ClExp
-     *
-     * @param[in] src First source tensor info. Data types supported: F16/F32.
-     * @param[in] dst Destination tensor info. Data types supported: same as @p src.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-
-/** Basic function to negate an src tensor. */
-class ClNeg : public IClOperator
-{
-public:
-    /** Constructor */
-    ClNeg() = default;
-    /** Initialize the function
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor info. Data types supported: F16/F32.
-     * @param[out] dst             Destination tensor info. Data types supported: same as @p src.
-     */
-    void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration of @ref ClNeg
-     *
-     * @param[in] src First source tensor info. Data types supported: F16/F32.
-     * @param[in] dst Destination tensor info. Data types supported: same as @p src.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-
-/** Basic function to calculate sine of an src tensor. */
-class ClSin : public IClOperator
-{
-public:
-    /** Constructor */
-    ClSin() = default;
-    /** Initialize the function
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor info. Data types supported: F16/F32.
-     * @param[out] dst             Destination tensor info. Data types supported: same as @p src.
-     */
-    void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration of @ref ClSin
-     *
-     * @param[in] src First source tensor info. Data types supported: F16/F32.
-     * @param[in] dst Destination tensor info. Data types supported: same as @p src.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-
-/** Basic function to perform elementwise log on an src tensor. */
-class ClLog : public IClOperator
-{
-public:
-    /** Constructor */
-    ClLog() = default;
-    /** Initialize the function
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor info. Data types supported: F16/F32.
-     * @param[out] dst             Destination tensor info. Data types supported: same as @p src.
-     */
-    void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration of @ref ClLog
-     *
-     * @param[in] src First source tensor info. Data types supported: F16/F32.
-     * @param[in] dst Destination tensor info. Data types supported: same as @p src.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-
-/** Basic function to get the absolute value of an src tensor. */
-class ClAbs : public IClOperator
-{
-public:
-    /** Initialize the function
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor info. Data types supported: F16/F32.
-     * @param[out] dst             Destination tensor info. Data types supported: same as @p src.
-     */
-    void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration of @ref ClAbs
-     *
-     * @param[in] src First source tensor info. Data types supported: F16/F32.
-     * @param[in] dst Destination tensor info. Data types supported: same as @p src.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-
-/** Basic function to get the round (to the nearest even) value of an src tensor. */
-class ClRound : public IClOperator
-{
-public:
-    /** Initialize the function
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor info. Data types supported: F16/F32.
-     * @param[out] dst             Destination tensor info. Data types supported: same as @p src.
-     */
-    void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration of @ref ClRound
-     *
-     * @param[in] src First source tensor info. Data types supported: F16/F32.
-     * @param[in] dst Destination tensor info. Data types supported: same as @p src.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_ELEMENTWISE_UNARY_H */
diff --git a/src/runtime/gpu/cl/operators/ClFill.cpp b/src/runtime/gpu/cl/operators/ClFill.cpp
deleted file mode 100644
index 4d0afaef24..0000000000
--- a/src/runtime/gpu/cl/operators/ClFill.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClFill.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClFillKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClFill::configure(const ClCompileContext &compile_context, ITensorInfo *tensor, const PixelValue &constant_value, Window *dst_window)
-{
-    auto k = std::make_unique<kernels::ClFillKernel>();
-    k->configure(compile_context, tensor, constant_value, dst_window);
-    _kernel = std::move(k);
-}
-
-Status ClFill::validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *dst_window)
-{
-    return kernels::ClFillKernel::validate(tensor, constant_value, dst_window);
-}
-} // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClFill.h b/src/runtime/gpu/cl/operators/ClFill.h
deleted file mode 100644
index e632d88546..0000000000
--- a/src/runtime/gpu/cl/operators/ClFill.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_FILL_H
-#define ARM_COMPUTE_CL_FILL_H
-
-#include "arm_compute/core/Window.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClFillKernel */
-class ClFill : public IClOperator
-{
-public:
-    /** Constructor */
-    ClFill() = default;
-    /** Initialise the kernel's tensor and filling value
-     *
-     * @param[in]     compile_context The compile context to be used.
-     * @param[in,out] tensor          Source tensor info. Supported data types: All.
-     * @param[in]     constant_value  The value used to fill the planes of the tensor
-     * @param[in]     window          Window to be used in case setting only part of a tensor. Default is nullptr.
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr);
-    /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClFillKernel
-     *
-     * @param[in] tensor         Source tensor info. Data types supported: All.
-     * @param[in] constant_value The value used to fill the planes of the tensor.
-     * @param[in] window         Window to be used in case setting only part of a tensor. Default is nullptr.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_FILL_H */
diff --git a/src/runtime/gpu/cl/operators/ClFlatten.cpp b/src/runtime/gpu/cl/operators/ClFlatten.cpp
deleted file mode 100644
index 060b653dee..0000000000
--- a/src/runtime/gpu/cl/operators/ClFlatten.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClFlatten.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClReshapeKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClFlatten::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::ClReshapeKernel>();
-    k->configure(compile_context, src, dst);
-    _kernel = std::move(k);
-}
-
-Status ClFlatten::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    return kernels::ClReshapeKernel::validate(src, dst);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClFlatten.h b/src/runtime/gpu/cl/operators/ClFlatten.h
deleted file mode 100644
index 20ad06ee57..0000000000
--- a/src/runtime/gpu/cl/operators/ClFlatten.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_FLATTEN_H
-#define ARM_COMPUTE_CL_FLATTEN_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to flatten a given input */
-class ClFlatten : public IClOperator
-{
-public:
-    /** Constructor */
-    ClFlatten() = default;
-    /** Configure operator for a given list of arguments
-     *
-     * Valid data layouts:
-     * - All
-     *
-     * Valid data type configurations:
-     * |src            |dst            |
-     * |:--------------|:--------------|
-     * |All            |All            |
-     *
-     * @param[in] compile_context The compile context to be used.
-     * @param[in] src             Source tensor to flatten with at least 3 dimensions.
-     *                            The dimensions above the third will be interpreted as batches. Data types supported: All
-     * @param[in] dst             Destination tensor with shape [w*h*d, input_batches] where:
-     *                            w = width input tensor, h = height input tensor and d = depth input tensor.
-     *                            Data type supported: same as @p src
-     */
-    void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration
-     *
-     * Similar to @ref ClFlatten::configure()
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_FLATTEN_H */
diff --git a/src/runtime/gpu/cl/operators/ClFloor.cpp b/src/runtime/gpu/cl/operators/ClFloor.cpp
deleted file mode 100644
index 94e77c0c54..0000000000
--- a/src/runtime/gpu/cl/operators/ClFloor.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClFloor.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClFloorKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClFloor::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::ClFloorKernel>();
-    k->configure(compile_context, src, dst);
-    _kernel = std::move(k);
-}
-
-Status ClFloor::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    return kernels::ClFloorKernel::validate(src, dst);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClFloor.h b/src/runtime/gpu/cl/operators/ClFloor.h
deleted file mode 100644
index f54eef9140..0000000000
--- a/src/runtime/gpu/cl/operators/ClFloor.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_FLOOR_H
-#define ARM_COMPUTE_CL_FLOOR_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClFloorKernel */
-class ClFloor : public IClOperator
-{
-public:
-    /** Constructor */
-    ClFloor() = default;
-    /** Configure operator for a given list of arguments
-     *
-     * @param[in] compile_context The compile context to be used.
-     * @param[in] src             Source tensor info. Data types supported: F16/F32.
-     * @param[in] dst             Destination tensor info. Data type supported: same as @p src
-     */
-    void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration of @ref ClFloor
-     *
-     * @param[in] src Source tensor info. Data types supported: F16/F32.
-     * @param[in] dst Destination tensor info. Data type supported: same as @p src
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_FLOOR_H */
diff --git a/src/runtime/gpu/cl/operators/ClGemm.cpp b/src/runtime/gpu/cl/operators/ClGemm.cpp
deleted file mode 100644
index a80375447d..0000000000
--- a/src/runtime/gpu/cl/operators/ClGemm.cpp
+++ /dev/null
@@ -1,760 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClGemm.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GPUTarget.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/Log.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/ITensorAllocator.h"
-#include "src/core/gpu/cl/IClKernel.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/MemoryHelpers.h"
-#include "src/core/utils/helpers/float_ops.h"
-#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h"
-#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h"
-#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h"
-
-#include "support/Cast.h"
-#include "utils/TypePrinter.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-using namespace arm_compute::misc::shape_calculator;
-using namespace arm_compute::cl_gemm;
-using namespace arm_compute::experimental;
-using namespace arm_compute::utils::cast;
-using namespace arm_compute::opencl::kernels;
-
-namespace
-{
-inline bool validate_gemm_kernel(CLGEMMKernelType kernel_type)
-{
-    switch(kernel_type)
-    {
-        case CLGEMMKernelType::NATIVE_V1:
-        case CLGEMMKernelType::RESHAPED_ONLY_RHS:
-        case CLGEMMKernelType::RESHAPED_V1:
-        case CLGEMMKernelType::RESHAPED:
-        {
-            return true;
-        }
-        default:
-        {
-            return false;
-        }
-    }
-}
-//Automatically select between mlgo (prioritized) and default heuristics for gemm kernel type
-inline CLGEMMKernelType auto_select_gemm_kernel(auto_heuristics::CommonQuery query, bool reshape_b_only_on_first_run, bool constant_weights)
-{
-    if(!constant_weights)
-    {
-        return CLGEMMKernelType::NATIVE_V1;
-    }
-
-    auto gemm_kernel = auto_heuristics::select_mlgo_gemm_kernel(query, reshape_b_only_on_first_run);
-    if(bool(gemm_kernel))
-    {
-        if(validate_gemm_kernel(gemm_kernel.gemm_type))
-        {
-            ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from mlgo heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str());
-            return gemm_kernel.gemm_type;
-        }
-    }
-    gemm_kernel = auto_heuristics::select_default_gemm_kernel(query, reshape_b_only_on_first_run);
-    ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from default heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str());
-    return gemm_kernel.gemm_type;
-}
-// Validate lhs_info and rhs_info for reshaped only rhs kernel
-inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c,
-                                                    const ITensorInfo *output, GEMMKernelInfo gemm_kernel_info)
-{
-    // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel
-    TensorInfo tmp_b_info{};
-    // Validate reshape RHS kernel
-    auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
-    if(!bool(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
-    {
-        return false;
-    }
-    // Validate mm kernel
-    gemm_kernel_info.lhs_info  = lhs_info;
-    gemm_kernel_info.rhs_info  = rhs_info;
-    gemm_kernel_info.has_pad_y = false;
-    if(!bool(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info)))
-    {
-        return false;
-    }
-    gemm_kernel_info.has_pad_y = true;
-    if(!bool(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info)))
-    {
-        return false;
-    }
-    return true;
-}
-
-//Automatically select between mlgo (prioritized) and default heuristics for reshaped only rhs kernel configs
-inline std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery query, GEMMKernelInfo kernel_info, const ITensorInfo *a,
-                                                                                                 const ITensorInfo *b,
-                                                                                                 const ITensorInfo *c, const ITensorInfo *output)
-{
-    auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(query);
-    if(config)
-    {
-        if(validate_lhs_rhs_info_reshaped_only_rhs(config.lhs_info, config.rhs_info, a, b, c, output, kernel_info))
-        {
-            ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
-            return { config.lhs_info, config.rhs_info };
-        }
-    }
-    config = auto_heuristics::select_default_gemm_config_reshaped_only_rhs(query);
-    ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
-    return { config.lhs_info, config.rhs_info };
-}
-
-// Validate lhs_info and rhs_info for reshaped kernel
-inline bool validate_lhs_rhs_info_reshaped(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c,
-                                           const ITensorInfo *output, GEMMKernelInfo gemm_kernel_info, bool reinterpret_input_as_3d)
-{
-    // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped kernel
-    TensorInfo tmp_a_info{};
-    TensorInfo tmp_b_info{};
-
-    // Validate reshape LHS kernel
-    auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, reinterpret_input_as_3d)));
-    if(!bool(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, reinterpret_input_as_3d)))
-    {
-        return false;
-    }
-
-    // Validate reshape RHS kernel
-    auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
-    if(!bool(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
-    {
-        return false;
-    }
-    // Validate mm kernel
-    gemm_kernel_info.lhs_info = lhs_info;
-    gemm_kernel_info.rhs_info = rhs_info;
-    if(!bool(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info)))
-    {
-        return false;
-    }
-    return true;
-}
-
-//Automatically select between mlgo (prioritized) and default heuristics for reshaped kernel configs
-inline std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery query, GEMMKernelInfo kernel_info, const ITensorInfo *a, const ITensorInfo *b,
-                                                                                        const ITensorInfo *c, const ITensorInfo *output, bool reinterpret_input_as_3d)
-{
-    auto config = auto_heuristics::select_mlgo_gemm_config_reshaped(query);
-    if(config)
-    {
-        if(validate_lhs_rhs_info_reshaped(config.lhs_info, config.rhs_info, a, b, c, output, kernel_info, reinterpret_input_as_3d))
-        {
-            ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
-            return { config.lhs_info, config.rhs_info };
-        }
-    }
-    config = auto_heuristics::select_default_gemm_config_reshaped(query);
-    ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
-    return { config.lhs_info, config.rhs_info };
-}
-} // namespace
-
-ClGemm::ClGemm()
-    : _mm_kernel(std::make_unique<ClGemmMatrixMultiplyKernel>()),
-      _reshape_lhs_kernel(std::make_unique<ClGemmReshapeLhsMatrixKernel>()),
-      _reshape_rhs_kernel(std::make_unique<ClGemmReshapeRhsMatrixKernel>()),
-      _mm_reshaped_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedKernel>()),
-      _mm_reshaped_only_rhs_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>()),
-      _mm_reshaped_only_rhs_fallback_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>()),
-      _tmp_a(),
-      _tmp_b(),
-      _reshape_b_only_on_first_run(false),
-      _gemm_kernel_type(CLGEMMKernelType::NATIVE_V1),
-      _aux_mem(AuxTensorIdx::Count)
-{
-}
-
-void ClGemm::configure_native_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,
-                                 const GEMMInfo &gemm_info)
-{
-    const unsigned int m          = gemm_info.reinterpret_input_as_3d() ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
-    const unsigned int n          = b->dimension(0);
-    const unsigned int k          = a->dimension(0);
-    const GPUTarget    gpu_target = CLScheduler::get().target();
-
-    // Set the target for the kernels
-    _mm_kernel->set_target(gpu_target);
-
-    GEMMReshapeInfo reshape_info(m, n, k, 1, 1, gemm_info.depth_output_gemm3d(), gemm_info.reinterpret_input_as_3d(), gemm_info.broadcast_bias());
-
-    // Configure and tune matrix multiply kernel
-    _mm_kernel->configure(compile_context, a, b, c, output, alpha, beta, false, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
-
-    // Tune kernel statically
-    CLScheduler::get().tune_kernel_static(*_mm_kernel);
-}
-
-void ClGemm::configure_reshaped_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,
-                                   const GEMMInfo &gemm_info)
-{
-    bool               reinterpret_input_as_3d   = gemm_info.reinterpret_input_as_3d();
-    const unsigned int m                         = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
-    const unsigned int n                         = b->dimension(0);
-    const unsigned int k                         = a->dimension(0);
-    const int          depth_output_gemm3d       = gemm_info.depth_output_gemm3d();
-    const GPUTarget    gpu_target                = CLScheduler::get().target();
-    int                mult_transpose1xW_width   = 1;
-    int                mult_interleave4x4_height = 1;
-
-    // Set the target for the kernels
-    _reshape_lhs_kernel->set_target(gpu_target);
-    _mm_kernel->set_target(gpu_target);
-
-    if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST)
-    {
-        mult_transpose1xW_width   = 4;
-        mult_interleave4x4_height = 2;
-    }
-
-    GEMMRHSMatrixInfo rhs_info;
-    rhs_info.n0         = 16 / b->element_size();
-    rhs_info.k0         = 1;
-    rhs_info.h0         = mult_transpose1xW_width;
-    rhs_info.interleave = false;
-    rhs_info.transpose  = false;
-
-    GEMMLHSMatrixInfo lhs_info;
-    lhs_info.m0         = 4;
-    lhs_info.k0         = 4;
-    lhs_info.v0         = mult_interleave4x4_height;
-    lhs_info.interleave = true;
-    lhs_info.transpose  = true;
-
-    GEMMReshapeInfo reshape_info(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, depth_output_gemm3d, false, gemm_info.broadcast_bias());
-
-    // Configure interleave kernel
-    _reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, reinterpret_input_as_3d);
-
-    // Configure transpose kernel
-    _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);
-
-    // Configure and tune matrix multiply kernel
-    _mm_kernel->configure(compile_context, &_tmp_a, &_tmp_b, c, output, alpha, beta, true, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
-
-    CLScheduler::get().tune_kernel_static(*_mm_kernel);
-
-    // Request memory for LHS and RHS reshape matrix
-    _aux_mem[LhsReshape] = MemoryInfo(offset_int_vec(LhsReshape), MemoryLifetime::Temporary, _tmp_a.total_size());
-    _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());
-}
-
-void ClGemm::configure_reshaped_v2(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,
-                                   const GEMMInfo &gemm_info)
-{
-    DataType           data_type               = a->data_type();
-    bool               reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
-    const unsigned int m                       = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
-    const unsigned int n                       = b->dimension(0);
-    const unsigned int k                       = a->dimension(0);
-    const unsigned int batch_size              = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
-    const int          depth_output_gemm3d     = gemm_info.depth_output_gemm3d();
-    const GPUTarget    gpu_target              = CLScheduler::get().target();
-    bool               broadcast_bias          = gemm_info.broadcast_bias();
-
-    GEMMKernelInfo kernel_info;
-    kernel_info.m                       = m;
-    kernel_info.n                       = n;
-    kernel_info.k                       = k;
-    kernel_info.depth_output_gemm3d     = depth_output_gemm3d;
-    kernel_info.reinterpret_input_as_3d = false;
-    kernel_info.broadcast_bias          = broadcast_bias;
-    kernel_info.activation_info         = gemm_info.activation_info();
-
-    // Set the target for the kernels
-    _reshape_lhs_kernel->set_target(gpu_target);
-    _mm_kernel->set_target(gpu_target);
-
-    GEMMLHSMatrixInfo lhs_info{};
-    GEMMRHSMatrixInfo rhs_info{};
-
-    // Pick up the GEMM configuration
-    std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b,
-                                                                    c, output, gemm_info.reinterpret_input_as_3d());
-
-    _reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d());
-    _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);
-
-    // Configure and tune matrix multiply kernel
-    _mm_reshaped_kernel->configure(compile_context, &_tmp_a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
-
-    // Request memory for LHS and RHS reshape matrix
-    _aux_mem[LhsReshape] = MemoryInfo(offset_int_vec(LhsReshape), MemoryLifetime::Temporary, _tmp_a.total_size());
-    _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());
-}
-
-void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,
-                                         const GEMMInfo &gemm_info)
-{
-    DataType           data_type               = a->data_type();
-    bool               reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
-    const unsigned int m                       = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
-    const unsigned int n                       = b->dimension(0);
-    const unsigned int k                       = a->dimension(0);
-    const unsigned int batch_size              = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
-    const int          depth_output_gemm3d     = gemm_info.depth_output_gemm3d();
-    const GPUTarget    gpu_target              = CLScheduler::get().target();
-    bool               broadcast_bias          = gemm_info.broadcast_bias();
-
-    GEMMKernelInfo kernel_info;
-    kernel_info.m                       = m;
-    kernel_info.n                       = n;
-    kernel_info.k                       = k;
-    kernel_info.depth_output_gemm3d     = depth_output_gemm3d;
-    kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
-    kernel_info.broadcast_bias          = broadcast_bias;
-    kernel_info.activation_info         = gemm_info.activation_info();
-
-    // Set the target for the kernels
-    _mm_kernel->set_target(gpu_target);
-
-    GEMMLHSMatrixInfo lhs_info{};
-    GEMMRHSMatrixInfo rhs_info{};
-
-    // Pick up the GEMM configuration
-    std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b, c, output);
-
-    // Transpose matrix
-    _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);
-
-    // Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (has_pad_y = false/true)
-    // During the prepare stage we check the padding requirement for the lhs and dst tensors. If they do not have
-    // pad y, we dispatch CLGEMMMatrixMultiplyReshapedOnlyRHSKernel with has_pad_y = false
-
-    // Configure matrix multiply kernel with no y padding support
-    kernel_info.has_pad_y = false;
-    _mm_reshaped_only_rhs_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
-
-    // Configure matrix multiply kernel with y padding support
-    kernel_info.has_pad_y = true;
-    _mm_reshaped_only_rhs_fallback_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
-
-    // Request memory for RHS reshape matrix
-    _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());
-}
-
-Status ClGemm::validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
-{
-    ARM_COMPUTE_UNUSED(alpha);
-    ARM_COMPUTE_UNUSED(output);
-
-    // Get the GPU target
-    const GPUTarget    gpu_target              = CLScheduler::get().target();
-    bool               reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
-    const unsigned int m                       = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
-    const unsigned int n                       = b->dimension(0);
-    const unsigned int k                       = a->dimension(0);
-    const int          depth_output_gemm3d     = gemm_info.depth_output_gemm3d();
-
-    const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d, gemm_info.broadcast_bias());
-
-    // Validate matrix multiply
-    ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyKernel::validate(a, b, c, output, alpha, beta,
-                                                                     false, reshape_info, gpu_target, gemm_info.fp_mixed_precision(), gemm_info.activation_info()));
-
-    return Status{};
-}
-
-Status ClGemm::validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
-{
-    ARM_COMPUTE_UNUSED(alpha);
-    ARM_COMPUTE_UNUSED(output);
-
-    TensorInfo tmp_a_info{};
-    TensorInfo tmp_b_info{};
-
-    // Get the GPU target
-    const GPUTarget    gpu_target                = CLScheduler::get().target();
-    const unsigned int m                         = gemm_info.reinterpret_input_as_3d() ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
-    const unsigned int n                         = b->dimension(0);
-    const unsigned int k                         = a->dimension(0);
-    int                mult_transpose1xW_width   = 1;
-    int                mult_interleave4x4_height = 1;
-    const int          depth_output_gemm3d       = gemm_info.depth_output_gemm3d();
-
-    if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST)
-    {
-        mult_transpose1xW_width   = 4;
-        mult_interleave4x4_height = 2;
-    }
-
-    GEMMRHSMatrixInfo rhs_info;
-    rhs_info.n0         = 16 / b->element_size();
-    rhs_info.k0         = 1;
-    rhs_info.h0         = mult_transpose1xW_width;
-    rhs_info.interleave = false;
-    rhs_info.transpose  = false;
-
-    GEMMLHSMatrixInfo lhs_info;
-    lhs_info.m0         = 4;
-    lhs_info.k0         = 4;
-    lhs_info.v0         = mult_interleave4x4_height;
-    lhs_info.interleave = true;
-    lhs_info.transpose  = true;
-
-    const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, depth_output_gemm3d, false, gemm_info.broadcast_bias());
-
-    // Validate interleave kernel
-    auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d())));
-    ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d()));
-
-    // Validate transpose kernel
-    auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
-    ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));
-
-    // Validate matrix multiply
-    ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta,
-                                                                     true, reshape_info, gpu_target, gemm_info.fp_mixed_precision(), gemm_info.activation_info()));
-
-    return Status{};
-}
-
-Status ClGemm::validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
-{
-    ARM_COMPUTE_UNUSED(alpha);
-    ARM_COMPUTE_UNUSED(output);
-
-    TensorInfo tmp_a_info{};
-    TensorInfo tmp_b_info{};
-
-    // Get the GPU target
-    const GPUTarget    gpu_target              = CLScheduler::get().target();
-    DataType           data_type               = a->data_type();
-    bool               reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
-    const unsigned int m                       = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
-    const unsigned int n                       = b->dimension(0);
-    const unsigned int k                       = a->dimension(0);
-    const unsigned int batch_size              = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
-    const int          depth_output_gemm3d     = gemm_info.depth_output_gemm3d();
-    const bool         broadcast_bias          = gemm_info.broadcast_bias();
-
-    GEMMKernelInfo kernel_info;
-    kernel_info.m                       = m;
-    kernel_info.n                       = n;
-    kernel_info.k                       = k;
-    kernel_info.depth_output_gemm3d     = depth_output_gemm3d;
-    kernel_info.reinterpret_input_as_3d = false;
-    kernel_info.broadcast_bias          = broadcast_bias;
-    kernel_info.activation_info         = gemm_info.activation_info();
-
-    GEMMLHSMatrixInfo lhs_info;
-    GEMMRHSMatrixInfo rhs_info;
-
-    // Pick up the GEMM configuration
-    // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
-    const auto gemm_config = select_default_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });
-    lhs_info               = gemm_config.lhs_info;
-    rhs_info               = gemm_config.rhs_info;
-
-    auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d())));
-    ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d()));
-
-    auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
-    ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));
-
-    // Validate matrix multiply
-    ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
-
-    return Status{};
-}
-
-Status ClGemm::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
-{
-    ARM_COMPUTE_UNUSED(alpha);
-    ARM_COMPUTE_UNUSED(output);
-
-    TensorInfo tmp_b_info{};
-
-    // Get the GPU target
-    const GPUTarget    gpu_target              = CLScheduler::get().target();
-    const DataType     data_type               = a->data_type();
-    bool               reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
-    const unsigned int m                       = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
-    const unsigned int n                       = b->dimension(0);
-    const unsigned int k                       = a->dimension(0);
-    const unsigned int batch_size              = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
-    const int          depth_output_gemm3d     = gemm_info.depth_output_gemm3d();
-    const bool         broadcast_bias          = gemm_info.broadcast_bias();
-
-    GEMMKernelInfo kernel_info;
-    kernel_info.m                       = m;
-    kernel_info.n                       = n;
-    kernel_info.k                       = k;
-    kernel_info.depth_output_gemm3d     = depth_output_gemm3d;
-    kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
-    kernel_info.broadcast_bias          = broadcast_bias;
-    kernel_info.activation_info         = gemm_info.activation_info();
-
-    GEMMLHSMatrixInfo lhs_info;
-    GEMMRHSMatrixInfo rhs_info;
-
-    // Pick up the GEMM configuration
-    // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
-    const auto gemm_config = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });
-    lhs_info               = gemm_config.lhs_info;
-    rhs_info               = gemm_config.rhs_info;
-
-    auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
-    ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));
-
-    // Validate matrix multiply
-    kernel_info.has_pad_y = false;
-    ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
-
-    kernel_info.has_pad_y = true;
-    ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
-
-    return Status{};
-}
-
-void ClGemm::configure(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
-
-    // Perform validation step
-    ARM_COMPUTE_ERROR_THROW_ON(validate(a, b, c, output, alpha, beta, gemm_info));
-
-    // Check if we need to reshape the matrix B only on the first run
-    _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
-
-    bool               reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
-    const unsigned int m                       = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
-    const unsigned int n                       = b->dimension(0);
-    const unsigned int k                       = a->dimension(0);
-    const unsigned int batch_size              = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
-
-    // Select GEMMType
-    _gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery{ CLScheduler::get().target(), a->data_type(), m, n, k, batch_size }, _reshape_b_only_on_first_run,
-                                                gemm_info.constant_weights());
-
-    const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);
-
-    ITensorInfo *c_to_use = fuse_add_c ? c : nullptr;
-
-    switch(_gemm_kernel_type)
-    {
-        case CLGEMMKernelType::NATIVE_V1:
-        {
-            configure_native_v1(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);
-            break;
-        }
-        case CLGEMMKernelType::RESHAPED_V1:
-        {
-            configure_reshaped_v1(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);
-            break;
-        }
-        case CLGEMMKernelType::RESHAPED:
-        {
-            configure_reshaped_v2(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);
-            break;
-        }
-        case CLGEMMKernelType::RESHAPED_ONLY_RHS:
-        {
-            configure_reshaped_only_rhs(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);
-            break;
-        }
-        default:
-        {
-            ARM_COMPUTE_ERROR("GEMMType not supported");
-        }
-    }
-}
-
-Status ClGemm::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
-{
-    // Get the GPU target
-    bool               reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
-    const unsigned int m                       = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
-    const unsigned int n                       = b->dimension(0);
-    const unsigned int k                       = a->dimension(0);
-    const unsigned int batch_size              = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
-
-    // Select GEMMType
-    CLGEMMKernelType gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery
-    {
-        CLScheduler::get().target(), a->data_type(), m, n, k, batch_size,
-    },
-    gemm_info.reshape_b_only_on_first_run(), gemm_info.constant_weights());
-
-    const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);
-
-    const ITensorInfo *c_to_use = fuse_add_c ? c : nullptr;
-
-    switch(gemm_kernel_type)
-    {
-        case CLGEMMKernelType::NATIVE_V1:
-        {
-            ARM_COMPUTE_RETURN_ON_ERROR(validate_native_v1(a, b, c_to_use, output, alpha, beta, gemm_info));
-            break;
-        }
-        case CLGEMMKernelType::RESHAPED_V1:
-        {
-            ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_v1(a, b, c_to_use, output, alpha, beta, gemm_info));
-            break;
-        }
-        case CLGEMMKernelType::RESHAPED:
-        {
-            ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped(a, b, c_to_use, output, alpha, beta, gemm_info));
-            break;
-        }
-        case CLGEMMKernelType::RESHAPED_ONLY_RHS:
-        {
-            ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_only_rhs(a, b, c_to_use, output, alpha, beta, gemm_info));
-            break;
-        }
-        default:
-        {
-            ARM_COMPUTE_RETURN_ERROR_MSG("GEMMType not supported");
-        }
-    }
-
-    return Status{};
-}
-
-void ClGemm::run(ITensorPack &tensors)
-{
-    const ITensor *lhs  = tensors.get_const_tensor(ACL_SRC_0);
-    const ITensor *rhs  = tensors.get_const_tensor(ACL_SRC_1);
-    const ITensor *src2 = tensors.get_const_tensor(ACL_SRC_2);
-    ITensor       *dst  = tensors.get_tensor(ACL_DST);
-
-    ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, dst);
-
-    CLAuxTensorHandler lhs_reshaped(offset_int_vec(LhsReshape), _tmp_a, tensors, true);
-    CLAuxTensorHandler rhs_reshaped(offset_int_vec(RhsReshape), _tmp_b, tensors, true);
-
-    // Prepare the consts if needed
-    prepare(tensors);
-
-    // Run matrix multiply kernel
-    switch(_gemm_kernel_type)
-    {
-        case CLGEMMKernelType::NATIVE_V1:
-        {
-            CLScheduler::get().enqueue_op(*_mm_kernel, tensors, true);
-            break;
-        }
-        case CLGEMMKernelType::RESHAPED_V1:
-        case CLGEMMKernelType::RESHAPED:
-        {
-            // Run interleave kernel
-            ITensorPack reshape_lhs_pack{ { ACL_SRC, lhs }, { ACL_DST, lhs_reshaped.get() } };
-            CLScheduler::get().enqueue_op(*_reshape_lhs_kernel, reshape_lhs_pack, false);
-
-            if(!_reshape_b_only_on_first_run)
-            {
-                // Run transpose kernel
-                ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } };
-                CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false);
-            }
-
-            ITensorPack gemm_reshaped_pack{ { ACL_SRC_0, lhs_reshaped.get() }, { ACL_SRC_1, rhs_reshaped.get() }, { ACL_SRC_2, src2 }, { ACL_DST, dst } };
-            if(_gemm_kernel_type == CLGEMMKernelType::RESHAPED)
-            {
-                CLScheduler::get().enqueue_op(*_mm_reshaped_kernel, gemm_reshaped_pack, true);
-            }
-            else
-            {
-                CLScheduler::get().enqueue_op(*_mm_kernel, gemm_reshaped_pack, true);
-            }
-            break;
-        }
-        case CLGEMMKernelType::RESHAPED_ONLY_RHS:
-        {
-            if(!_reshape_b_only_on_first_run)
-            {
-                // Run transpose kernel
-                ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } };
-                CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false);
-            }
-            // In case of RESHAPED_ONLY_RHS, we need to check the padding requirement
-            // Check if the lhs or dst tensors have padding
-            const unsigned int cross_plane_pad_lhs = lhs->info()->padding().top + lhs->info()->padding().bottom;
-            const unsigned int cross_plane_pad_dst = dst->info()->padding().top + dst->info()->padding().bottom;
-            bool               has_pad_y           = (cross_plane_pad_lhs != 0) || (cross_plane_pad_dst != 0);
-
-            ITensorPack gemm_reshaped_onlyrhs_pack{ { ACL_SRC_0, lhs }, { ACL_SRC_1, rhs_reshaped.get() }, { ACL_SRC_2, src2 }, { ACL_DST, dst } };
-            if(has_pad_y)
-            {
-                CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_fallback_kernel, gemm_reshaped_onlyrhs_pack, true);
-            }
-            else
-            {
-                CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_kernel, gemm_reshaped_onlyrhs_pack, true);
-            }
-            break;
-        }
-        default:
-        {
-            ARM_COMPUTE_ERROR("GEMMType not supported");
-        }
-    }
-}
-
-void ClGemm::prepare(ITensorPack &constants)
-{
-    const ITensor *src1    = constants.get_const_tensor(ACL_SRC_1);
-    ICLTensor     *rhs_aux = utils::cast::polymorphic_downcast<ICLTensor *>(constants.get_tensor(offset_int_vec(RhsReshape)));
-
-    // If memory for RHS is persistent and src1 is provided re-transform else assume that RHS is transformed
-    if((_aux_mem[AuxTensorIdx::RhsReshape].lifetime == MemoryLifetime::Persistent) && (src1 != nullptr && rhs_aux != nullptr) && rhs_aux)
-    {
-        CLAuxTensorHandler rhs_reshaped(_tmp_b, *rhs_aux);
-        ARM_COMPUTE_ERROR_ON(rhs_reshaped.get()->cl_buffer().get() == nullptr);
-
-        ITensorPack reshape_rhs_pack{ { ACL_SRC, src1 }, { ACL_DST, rhs_reshaped.get() } };
-        CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, true);
-    }
-}
-
-experimental::MemoryRequirements ClGemm::workspace() const
-{
-    return _aux_mem;
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClGemm.h b/src/runtime/gpu/cl/operators/ClGemm.h
deleted file mode 100644
index bd9ca17edf..0000000000
--- a/src/runtime/gpu/cl/operators/ClGemm.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_GEMM_H
-#define ARM_COMPUTE_CL_GEMM_H
-
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTypes.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/IClKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to execute GEMM on OpenCL. This function calls the following OpenCL kernels:
- *
- *  -# @ref kernels::ClGemmReshapeLhsMatrixKernel (only if the RESHAPED_V1 is selected by the heuristic model)
- *  -# @ref kernels::ClGemmReshapeRhsMatrixKernel (only if either the RESHAPED_V1 or RESHAPED_ONLY_RHS is selected by the select_gemm_kernel method())
- *  -# @ref kernels::ClGemmMatrixMultiplyKernel (only if either the NATIVE or RESHAPED_V1 is selected by the select_gemm_kernel method())
- *  -# @ref kernels::ClGemmMatrixMultiplyReshapedKernel (only if RESHAPED_V1 is selected by the select_gemm_kernel method())
- *  -# @ref kernels::ClGemmMatrixMultiplyReshapedOnlyRhsKernel (only if RESHAPED_ONLY_RHS is selected by the select_gemm_kernel method())
- */
-class ClGemm : public IClOperator
-{
-public:
-    /** Constructor */
-    ClGemm();
-    /** Initialise the kernel's inputs and output
-     *
-     * Valid data layouts:
-     * - All
-     *
-     * Valid data type configurations:
-     * |src0         |src1        |src2      |dst            |
-     * |:------------|:-----------|:---------|:--------------|
-     * |F32          |F32         |F32       |F32            |
-     * |F16          |F16         |F16       |F16            |
-     *
-     * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
-     *
-     * @note All tensors must have the same data type.
-     *
-     * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  a               First input tensor  (Matrix or Vector A). Data types supported: F16/F32
-     * @param[in]  b               Second input tensor (Matrix B). Data type supported: same as @p a.
-     * @param[in]  c               Third input tensor  (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
-     * @param[out] output          Output tensor. Data type supported: same as @p a
-     * @param[in]  alpha           Weight of the matrix product
-     * @param[in]  beta            Weight of matrix C
-     * @param[in]  gemm_info       (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
-     *                       if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping
-     *                       in case matrix A and matrix B have been already transformed.
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    /** Static function to check if given info will lead to a valid configuration
-     *
-     * Similar to ClGemm::configure()
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-
-    // Inherited methods overridden:
-    void run(ITensorPack &tensors) override;
-    void prepare(ITensorPack &constants) override;
-    experimental::MemoryRequirements workspace() const override;
-
-private:
-    void configure_native_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped_v2(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped_only_rhs(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-
-    static Status validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    static Status validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    static Status validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    static Status validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-
-private:
-    enum AuxTensorIdx
-    {
-        LhsReshape = 0,
-        RhsReshape,
-        Count
-    };
-
-private:
-    std::unique_ptr<kernels::ClGemmMatrixMultiplyKernel>                _mm_kernel;
-    std::unique_ptr<kernels::ClGemmReshapeLhsMatrixKernel>              _reshape_lhs_kernel;
-    std::unique_ptr<kernels::ClGemmReshapeRhsMatrixKernel>              _reshape_rhs_kernel;
-    std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedKernel>        _mm_reshaped_kernel;
-    std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_kernel;
-    std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_fallback_kernel;
-    TensorInfo                                                          _tmp_a;
-    TensorInfo                                                          _tmp_b;
-    bool                                                                _reshape_b_only_on_first_run;
-    CLGEMMKernelType                                                    _gemm_kernel_type;
-
-    experimental::MemoryRequirements _aux_mem{};
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMM_H */
diff --git a/src/runtime/gpu/cl/operators/ClLogicalNot.cpp b/src/runtime/gpu/cl/operators/ClLogicalNot.cpp
deleted file mode 100644
index 400efe450d..0000000000
--- a/src/runtime/gpu/cl/operators/ClLogicalNot.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClLogicalNot.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClLogicalNot::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
-    k->configure(compile_context, src, dst, ElementWiseUnary::LOGICAL_NOT);
-    _kernel = std::move(k);
-}
-
-Status ClLogicalNot::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::LOGICAL_NOT);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClLogicalNot.h b/src/runtime/gpu/cl/operators/ClLogicalNot.h
deleted file mode 100644
index 25ddf564b5..0000000000
--- a/src/runtime/gpu/cl/operators/ClLogicalNot.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_LOGICAL_NOT_H
-#define ARM_COMPUTE_CL_LOGICAL_NOT_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClElementWiseUnaryKernel for NOT operation */
-class ClLogicalNot : public IClOperator
-{
-public:
-    /** Constructor */
-    ClLogicalNot() = default;
-    /** Configure operator for a given list of arguments
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor info. Data types supported: U8.
-     * @param[out] dst             Destination tensor info. Data types supported: same as @p src.
-     */
-    void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration
-     *
-     * @param[in] src Soure tensor info. Data types supported: U8.
-     * @param[in] dst Destination tensor info. Data types supported: same as @p src.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_LOGICAL_NOT_H */
diff --git a/src/runtime/gpu/cl/operators/ClMul.cpp b/src/runtime/gpu/cl/operators/ClMul.cpp
deleted file mode 100644
index d1e2bc806f..0000000000
--- a/src/runtime/gpu/cl/operators/ClMul.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClMul.h"
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClMulKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClMul::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale,
-                      ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
-{
-    auto k = std::make_unique<kernels::ClMulKernel>();
-    k->configure(compile_context, src1, src2, dst, scale, overflow_policy, rounding_policy, act_info);
-    _kernel = std::move(k);
-}
-
-Status ClMul::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale,
-                       ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
-{
-    return kernels::ClMulKernel::validate(src1, src2, dst, scale, overflow_policy, rounding_policy, act_info);
-}
-
-void ClComplexMul::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
-    auto k = std::make_unique<kernels::ClComplexMulKernel>();
-    k->configure(compile_context, src1, src2, dst, act_info);
-    _kernel = std::move(k);
-}
-
-Status ClComplexMul::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
-    return kernels::ClComplexMulKernel::validate(src1, src2, dst, act_info);
-}
-} // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClMul.h b/src/runtime/gpu/cl/operators/ClMul.h
deleted file mode 100644
index 4a662b3276..0000000000
--- a/src/runtime/gpu/cl/operators/ClMul.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_MUL_H
-#define ARM_COMPUTE_CL_MUL_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref opencl::kernels::ClMulKernel */
-class ClMul : public IClOperator
-{
-public:
-    /** Default Constructor */
-    ClMul() = default;
-    /** Initialise the kernel's sources, dst and convertion policy.
-     *
-     * Valid configurations (src1,src2) -> Output :
-     *
-     *   - (U8,U8)                         -> U8
-     *   - (U8,U8)                         -> S16
-     *   - (U8,S16)                        -> S16
-     *   - (S16,U8)                        -> S16
-     *   - (S16,S16)                       -> S16
-     *   - (F16,F16)                       -> F16
-     *   - (F32,F32)                       -> F32
-     *   - (QASYMM8,QASYMM8)               -> QASYMM8
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (QSYMM16,QSYMM16)               -> QSYMM16
-     *   - (QSYMM16,QSYMM16)               -> S32
-     *
-     * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] src1            An src tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
-     *                                 The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] src2            An src tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
-     *                                 The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     dst             The dst tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
-     * @param[in]      scale           Scale to apply after multiplication.
-     *                                 Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
-     * @param[in]      overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
-     * @param[in]      rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
-     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale,
-                   ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-    /** Static function to check if given info will lead to a valid configuration
-     *
-     * Similar to @ref ClMul::configure()
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale,
-                           ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-
-/** Basic function to run @ref opencl::kernels::ClComplexMulKernel */
-class ClComplexMul : public IClOperator
-{
-public:
-    /** Default Constructor */
-    ClComplexMul() = default;
-    /** Initialise the kernel's sources, dst.
-     *
-     * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] src1            An src tensor info. Data types supported: F16/F32. Number of channels supported: 2.
-     *                                 The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] src2            An src tensor info. Data types supported: same as @p src1. Number of channels supported: same as @p src1.
-     *                                 The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     dst             The dst tensor info, Data types supported: same as @p src1. Number of channels supported: same as @p src1.
-     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-    /** Static function to check if given info will lead to a valid configuration
-     *
-     * Similar to @ref ClComplexMul::configure()
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_MUL_H */
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClPRelu.cpp b/src/runtime/gpu/cl/operators/ClPRelu.cpp
deleted file mode 100644
index d1ce14cc87..0000000000
--- a/src/runtime/gpu/cl/operators/ClPRelu.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClPRelu.h"
-#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-using KernelType = kernels::ClArithmeticKernel;
-void ClPRelu::configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output)
-{
-    auto k = std::make_unique<KernelType>();
-    k->configure(compile_context, ArithmeticOperation::PRELU, input, alpha, (output == nullptr ? input : output));
-    _kernel = std::move(k);
-}
-
-Status ClPRelu::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
-{
-    return KernelType::validate(ArithmeticOperation::PRELU, input, alpha, (output == nullptr ? input : output));
-}
-
-void ClPRelu::run(ITensorPack &tensors)
-{
-    // Output tensor can be given as nullptr for in-place computation.
-    // In this case, get the input tensor and use it as the output tensor.
-    if(tensors.get_tensor(TensorType::ACL_DST) == nullptr)
-    {
-        auto src_tensor = const_cast<ITensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
-        ARM_COMPUTE_ERROR_ON_MSG(src_tensor == nullptr, "invalid source tensor is given for in-place computation");
-        tensors.add_tensor(TensorType::ACL_DST, src_tensor);
-    }
-    IClOperator::run(tensors);
-}
-} // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClPRelu.h b/src/runtime/gpu/cl/operators/ClPRelu.h
deleted file mode 100644
index 70202aeb81..0000000000
--- a/src/runtime/gpu/cl/operators/ClPRelu.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_PRELU_H
-#define ARM_COMPUTE_CL_PRELU_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic operator to run @ref arm_compute::opencl::kernels::ClArithmeticKernel for PRELU
- *
- * @note The operator implements an activation layer with the PRELU activation function.
- */
-class ClPRelu : public IClOperator
-{
-public:
-    /** Default constructor */
-    ClPRelu() = default;
-    /** Set the input and output tensor.
-     *
-     * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in]  alpha           PRelu layer parameters. Data types supported: same of @p input.
-     * @param[out] output          Destination tensor. Data type supported: same as @p input
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClArithmeticKernel for PRELU
-     *
-     * @param[in] input  Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] alpha  PRelu layer parameters. Data types supported: same of @p input.
-     * @param[in] output Destination tensor info. Data type supported: same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(ITensorPack &tensors) override;
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_PRELU_H */
diff --git a/src/runtime/gpu/cl/operators/ClPermute.cpp b/src/runtime/gpu/cl/operators/ClPermute.cpp
deleted file mode 100644
index 719bb6dac6..0000000000
--- a/src/runtime/gpu/cl/operators/ClPermute.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClPermute.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClPermuteKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClPermute::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm)
-{
-    auto k = std::make_unique<kernels::ClPermuteKernel>();
-    k->configure(compile_context, src, dst, perm);
-    _kernel = std::move(k);
-}
-
-Status ClPermute::validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm)
-{
-    return kernels::ClPermuteKernel::validate(src, dst, perm);
-}
-} // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClPermute.h b/src/runtime/gpu/cl/operators/ClPermute.h
deleted file mode 100644
index 20e7a32428..0000000000
--- a/src/runtime/gpu/cl/operators/ClPermute.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_PERMUTE_H
-#define ARM_COMPUTE_CL_PERMUTE_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClPermuteKernel */
-class ClPermute : public IClOperator
-{
-public:
-    /** Constructor */
-    ClPermute() = default;
-    /** Initialise the kernel's inputs and outputs and permute vector
-     *
-     * @note Arbitrary permutation vectors are supported with rank not greater than 4
-     *
-     * @param[in] compile_context The compile context to be used.
-     * @param[in] src             The src tensor info. Data types supported: All.
-     * @param[in] dst             The dst tensor info. Data types supported: Same as @p src
-     * @param[in] perm            Permutation vector
-     */
-    void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm);
-    /**  Static function to check if given info will lead to a valid configuration of @ref kernels::ClPermuteKernel.
-     *
-     * @note Arbitrary permutation vectors are supported with rank not greater than 4
-     *
-     * @param[in] src  First tensor src info. Data types supported: All.
-     * @param[in] dst  Output tensor info. Data types supported: same as @p src.
-     * @param[in] perm Permutation vector
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_PERMUTE_H */
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClPool2d.cpp b/src/runtime/gpu/cl/operators/ClPool2d.cpp
deleted file mode 100644
index 40c2b0a8ba..0000000000
--- a/src/runtime/gpu/cl/operators/ClPool2d.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClPool2d.h"
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClPool2dKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClPool2d::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(src);
-    // Configure pooling kernel
-    auto k = std::make_unique<kernels::ClPool2dKernel>();
-    k->set_target(CLScheduler::get().target());
-    k->configure(compile_context, src, dst, info, indices);
-    _pooling = std::move(k);
-
-    const DataType data_type = src->data_type();
-
-    // Configure border depending on operation required (quantize border in case of asymmetric data_type)
-    BorderMode border_mode{};
-    PixelValue pixel_value(0.f);
-    if(is_data_type_quantized_asymmetric(data_type) && !info.exclude_padding)
-    {
-        pixel_value = PixelValue(0, data_type, src->quantization_info());
-    }
-
-    // Data layout
-    const auto data_layout = info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : info.data_layout;
-
-    switch(data_layout)
-    {
-        case DataLayout::NCHW:
-            border_mode = (PoolingType::MAX == info.pool_type) ? BorderMode::REPLICATE : BorderMode::CONSTANT;
-            break;
-        case DataLayout::NHWC:
-            border_mode = BorderMode::CONSTANT;
-            if(PoolingType::MAX == info.pool_type)
-            {
-                if(is_data_type_quantized(data_type))
-                {
-                    std::tie(pixel_value, std::ignore) = get_min_max(data_type);
-                }
-                else
-                {
-                    pixel_value = PixelValue(std::numeric_limits<float>::lowest());
-                }
-            }
-            break;
-        default:
-            ARM_COMPUTE_ERROR("Data layout not supported");
-    }
-    auto b = std::make_unique<CLFillBorderKernel>();
-    b->configure(compile_context, src, _pooling->border_size(), border_mode, pixel_value);
-    _border_handler = std::move(b);
-
-    // Tune kernels
-    CLScheduler::get().tune_kernel_static(*_pooling);
-}
-
-Status ClPool2d::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info, const ITensorInfo *indices)
-{
-    return kernels::ClPool2dKernel::validate(src, dst, info, indices);
-}
-
-void ClPool2d::run(ITensorPack &tensors)
-{
-    ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
-
-    CLScheduler::get().enqueue_op(*_border_handler.get(), tensors, false);
-    CLScheduler::get().enqueue_op(*_pooling.get(), tensors, false);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClPool2d.h b/src/runtime/gpu/cl/operators/ClPool2d.h
deleted file mode 100644
index 8ac386a64b..0000000000
--- a/src/runtime/gpu/cl/operators/ClPool2d.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_POOL2D_H
-#define ARM_COMPUTE_CL_POOL2D_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if padding size is different from zero)
- * -# @ref opencl::ClPool2d
- */
-class ClPool2d : public IClOperator
-{
-public:
-    /** Constructor */
-    ClPool2d() = default;
-    /** Configure operator for a given list of arguments
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[out] dst             Destination tensor info. Data type supported: same as @p src
-     * @param[in]  info            Pooling layer parameters.
-     * @param[out] indices         (optional) The indices info of the maximal values. Data type supported: U32.
-     */
-    void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices = nullptr);
-    /** Static function to check if given info will lead to a valid configuration
-     *
-     * Similar to ClPool2d::configure()
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info, const ITensorInfo *indices = nullptr);
-
-    // Inherited method overridden
-    void run(ITensorPack &tensors) override;
-
-private:
-    std::unique_ptr<ICLKernel> _pooling{ nullptr };
-    std::unique_ptr<ICLKernel> _border_handler{ nullptr };
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_POOL2D_H */
diff --git a/src/runtime/gpu/cl/operators/ClQuantize.cpp b/src/runtime/gpu/cl/operators/ClQuantize.cpp
deleted file mode 100644
index 92bbb62ba5..0000000000
--- a/src/runtime/gpu/cl/operators/ClQuantize.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClQuantize.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClQuantizeKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClQuantize::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::ClQuantizeKernel>();
-    k->configure(compile_context, src, dst);
-    _kernel = std::move(k);
-}
-
-Status ClQuantize::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    return kernels::ClQuantizeKernel::validate(src, dst);
-}
-
-void ClQuantize::run(ITensorPack &tensors)
-{
-    ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
-    CLScheduler::get().enqueue_op(*_kernel.get(), tensors);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClQuantize.h b/src/runtime/gpu/cl/operators/ClQuantize.h
deleted file mode 100644
index 0b6d2c8cbe..0000000000
--- a/src/runtime/gpu/cl/operators/ClQuantize.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_QUANTIZE_H
-#define ARM_COMPUTE_CL_QUANTIZE_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClQuantizeKernel that dequantizes an input tensor */
-class ClQuantize : public IClOperator
-{
-public:
-    /** Constructor */
-    ClQuantize() = default;
-    /** Set the input and output tensors.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.
-     * @param[out] dst             Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
-     *
-     * @note Output auto initialization is not supported by this function
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration
-     *
-     * Similar to @ref ClQuantize::configure()
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-
-    // Inherited method overridden
-    void run(ITensorPack &tensors) override;
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_QUANTIZE_H */
diff --git a/src/runtime/gpu/cl/operators/ClReshape.cpp b/src/runtime/gpu/cl/operators/ClReshape.cpp
deleted file mode 100644
index d3fa9f10ab..0000000000
--- a/src/runtime/gpu/cl/operators/ClReshape.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClReshape.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClReshapeKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClReshape::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::ClReshapeKernel>();
-    k->configure(compile_context, src, dst);
-    _kernel = std::move(k);
-}
-
-Status ClReshape::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    return kernels::ClReshapeKernel::validate(src, dst);
-}
-} // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClReshape.h b/src/runtime/gpu/cl/operators/ClReshape.h
deleted file mode 100644
index 8cccc5776c..0000000000
--- a/src/runtime/gpu/cl/operators/ClReshape.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_RESHAPE_H
-#define ARM_COMPUTE_CL_RESHAPE_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClReshapeKernel */
-class ClReshape : public IClOperator
-{
-public:
-    /** Constructor */
-    ClReshape() = default;
-    /** Initialise the kernel's inputs and outputs
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Input tensor info. Data type supported: All
-     * @param[out] output          Output info. Data type supported: Same as @p input
-     */
-    void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClReshapeKernel
-     *
-     * @param[in] input  Input tensor info. Data type supported: All
-     * @param[in] output Output tensor info. Data type supported: Same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_RESHAPE_H */
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClScale.cpp b/src/runtime/gpu/cl/operators/ClScale.cpp
deleted file mode 100644
index 4730c8a16e..0000000000
--- a/src/runtime/gpu/cl/operators/ClScale.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClScale.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClScaleKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClScale::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ScaleKernelInfo &info)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(src);
-    // Configure Scale kernel
-    auto k = std::make_unique<kernels::ClScaleKernel>();
-    k->set_target(CLScheduler::get().target());
-    k->configure(compile_context, src, dst, info);
-    _kernel = std::move(k);
-    if(!_kernel->border_size().empty())
-    {
-        auto b = std::make_unique<CLFillBorderKernel>();
-        b->configure(compile_context, src, _kernel->border_size(), info.border_mode, info.constant_border_value);
-        _border_handler = std::move(b);
-    }
-    // Tune kernel
-    CLScheduler::get().tune_kernel_static(*_kernel);
-}
-
-Status ClScale::validate(const ITensorInfo *src, const ITensorInfo *dst, const ScaleKernelInfo &info)
-{
-    return kernels::ClScaleKernel::validate(src, dst, info);
-}
-
-void ClScale::run(ITensorPack &tensors)
-{
-    ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
-    if(!_kernel->border_size().empty())
-    {
-        CLScheduler::get().enqueue_op(*_border_handler.get(), tensors, false);
-    }
-    CLScheduler::get().enqueue_op(*_kernel.get(), tensors);
-}
-} // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClScale.h b/src/runtime/gpu/cl/operators/ClScale.h
deleted file mode 100644
index 6eccb59be8..0000000000
--- a/src/runtime/gpu/cl/operators/ClScale.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_SCALE_H
-#define ARM_COMPUTE_CL_SCALE_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to simulate a scale layer. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if padding size is different from zero)
- * -# @ref kernels::ClScaleKernel
- */
-class ClScale : public IClOperator
-{
-public:
-    /** Constructor */
-    ClScale() = default;
-    /** Initialize the function's source, destination, interpolation type and border_mode.
-     *
-     * @param[in]     compile_context The compile context to be used.
-     * @param[in,out] src             Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
-     * @param[out]    dst             Destination tensor info. Data types supported: Same as @p src
-     *                                All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
-     * @param[in]     info            @ref ScaleKernelInfo descriptor to be used to configure
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ScaleKernelInfo &info);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref ClScale
-     *
-     * @param[in] src  Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32.
-     * @param[in] dst  Output tensor info. Data type supported: Same as @p src
-     *                 All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
-     * @param[in] info @ref ScaleKernelInfo descriptor to be used to validate
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ScaleKernelInfo &info);
-
-    // Inherited method overridden
-    void run(ITensorPack &tensors) override;
-
-protected:
-    std::unique_ptr<ICLKernel> _border_handler{ nullptr };
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSCALE_H */
diff --git a/src/runtime/gpu/cl/operators/ClSoftmax.cpp b/src/runtime/gpu/cl/operators/ClSoftmax.cpp
deleted file mode 100644
index 975bb0b932..0000000000
--- a/src/runtime/gpu/cl/operators/ClSoftmax.cpp
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClSoftmax.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/gpu/cl/kernels/ClSoftmaxKernel.h"
-#include "src/core/helpers/MemoryHelpers.h"
-#include "src/core/helpers/SoftmaxHelpers.h"
-#include "src/runtime/gpu/cl/operators/ClPermute.h"
-#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h"
-#include "support/Cast.h"
-
-using namespace arm_compute::experimental;
-
-namespace arm_compute
-{
-namespace opencl
-{
-ClSoftmax::ClSoftmax()
-    : _permute_input(std::make_unique<ClPermute>()),
-      _permute_output(std::make_unique<ClPermute>()),
-      _max_shift_exp_sum_kernel(std::make_unique<kernels::ClLogits1DMaxShiftExpSumKernel>()),
-      _norm_kernel(std::make_unique<kernels::ClLogits1DNormKernel>()),
-      _max_info(),
-      _sum_info(),
-      _tmp_info(),
-      _permuted_src_info(),
-      _permuted_dst_info(),
-      _aux_mem(InternalTensorIdx::COUNT)
-{
-}
-
-void ClSoftmax::configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &dst, const SoftmaxKernelInfo &info)
-{
-    ARM_COMPUTE_ERROR_THROW_ON(validate(src, dst, info));
-
-    const size_t actual_axis = static_cast<size_t>(wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions())));
-
-    _needs_permute = actual_axis != 0;
-
-    const ITensorInfo &tmp_input_info  = _needs_permute ? _permuted_src_info : src;
-    ITensorInfo       &tmp_output_info = _needs_permute ? _permuted_dst_info : dst;
-
-    if(_needs_permute)
-    {
-        const auto perm_info = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis);
-        _permute_input->configure(compile_context, &src, &_permuted_src_info, perm_info);
-    }
-
-    DataType tmp_data_type = is_data_type_quantized_asymmetric(tmp_input_info.data_type()) ? DataType::S32 : tmp_input_info.data_type();
-    _tmp_info              = tmp_input_info.clone()->set_data_type(tmp_data_type);
-
-    TensorShape max_sum_shape = tmp_input_info.tensor_shape();
-    _max_info                 = tmp_input_info.clone()->set_tensor_shape(max_sum_shape);
-    _sum_info                 = tmp_input_info.clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type);
-
-    // Set GPU target to kernels
-    _max_shift_exp_sum_kernel->set_target(CLScheduler::get().target());
-
-    _max_shift_exp_sum_kernel->configure(compile_context, tmp_input_info, _max_info, _tmp_info, _sum_info, info);
-    _norm_kernel->configure(compile_context, _tmp_info, _sum_info, tmp_output_info, info);
-
-    if(_needs_permute)
-    {
-        const auto perm_info = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis);
-        _permute_output->configure(compile_context, &_permuted_dst_info, &dst, perm_info);
-    }
-
-    _aux_mem[InternalTensorIdx::SUM] = MemoryInfo(offset_int_vec(InternalTensorIdx::SUM), MemoryLifetime::Temporary, _sum_info.total_size());
-    _aux_mem[InternalTensorIdx::TMP] = MemoryInfo(offset_int_vec(InternalTensorIdx::TMP), MemoryLifetime::Temporary, _tmp_info.total_size());
-    _aux_mem[InternalTensorIdx::MAX] = MemoryInfo(offset_int_vec(InternalTensorIdx::MAX), MemoryLifetime::Temporary, _max_info.total_size());
-
-    _aux_mem[InternalTensorIdx::PERMUTED_SRC] = MemoryInfo(offset_int_vec(InternalTensorIdx::PERMUTED_SRC), MemoryLifetime::Temporary, _permuted_src_info.total_size());
-    _aux_mem[InternalTensorIdx::PERMUTED_DST] = MemoryInfo(offset_int_vec(InternalTensorIdx::PERMUTED_DST), MemoryLifetime::Temporary, _permuted_dst_info.total_size());
-}
-
-Status ClSoftmax::validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info)
-{
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(src.num_dimensions() > 4, "Only up to 4 dimensions are supported");
-    ARM_COMPUTE_UNUSED(info.beta);
-    ARM_COMPUTE_RETURN_ERROR_ON(info.axis < static_cast<int32_t>(-src.num_dimensions()) || static_cast<int32_t>(src.num_dimensions()) <= info.axis);
-
-    const size_t actual_axis   = static_cast<size_t>(wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions())));
-    const bool   needs_permute = actual_axis != 0;
-    if(needs_permute)
-    {
-        const PermutationVector permutation_vector = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis);
-        const TensorShape       permuted_shape     = misc::shape_calculator::compute_permutation_output_shape(src, permutation_vector);
-        TensorInfo              input_permuted(src.clone()->set_tensor_shape(permuted_shape));
-        ARM_COMPUTE_RETURN_ON_ERROR(ClPermute::validate(&src, &input_permuted, permutation_vector));
-        TensorInfo output_permuted(dst.clone()->set_tensor_shape(permuted_shape));
-        ARM_COMPUTE_RETURN_ON_ERROR(ClPermute::validate(&output_permuted, &dst, permutation_vector));
-    }
-
-    // Create intermediate tensor info
-    DataType   tmp_data_type = is_data_type_quantized_asymmetric(src.data_type()) ? DataType::S32 : src.data_type();
-    TensorInfo tensor_info_tmp(src.clone()->set_data_type(tmp_data_type).set_is_resizable(true));
-
-    TensorShape max_sum_shape = src.tensor_shape();
-    max_sum_shape.set(0, 1);
-    TensorInfo tensor_info_max(src.clone()->set_tensor_shape(max_sum_shape).set_is_resizable(true));
-    TensorInfo tensor_info_sum(src.clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type).set_quantization_info(QuantizationInfo()).set_is_resizable(true));
-
-    ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClLogits1DMaxShiftExpSumKernel::validate(src, tensor_info_max, tensor_info_tmp, tensor_info_sum));
-    ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClLogits1DNormKernel::validate(tensor_info_tmp, tensor_info_sum, dst, info));
-
-    return Status{};
-}
-
-void ClSoftmax::run(ITensorPack &tensors)
-{
-    auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
-    auto dst = tensors.get_tensor(TensorType::ACL_DST);
-
-    CLAuxTensorHandler sum(offset_int_vec(InternalTensorIdx::SUM), _sum_info, tensors, false);
-    CLAuxTensorHandler tmp(offset_int_vec(InternalTensorIdx::TMP), _tmp_info, tensors, false);
-    CLAuxTensorHandler max(offset_int_vec(InternalTensorIdx::MAX), _max_info, tensors, false);
-
-    CLAuxTensorHandler permuted_src(offset_int_vec(InternalTensorIdx::PERMUTED_SRC), _permuted_src_info, tensors, false);
-    CLAuxTensorHandler permuted_dst(offset_int_vec(InternalTensorIdx::PERMUTED_DST), _permuted_dst_info, tensors, false);
-
-    if(_needs_permute)
-    {
-        ITensorPack pack;
-        pack.add_const_tensor(TensorType::ACL_SRC, src);
-        pack.add_tensor(TensorType::ACL_DST, permuted_src.get());
-        _permute_input.get()->run(pack);
-    }
-
-    ITensorPack sum_pack;
-    ITensorPack norm_pack;
-    if(_needs_permute)
-    {
-        sum_pack.add_const_tensor(TensorType::ACL_SRC, permuted_src.get());
-        norm_pack.add_tensor(TensorType::ACL_DST, permuted_dst.get());
-    }
-    else
-    {
-        sum_pack.add_const_tensor(TensorType::ACL_SRC, src);
-        norm_pack.add_tensor(TensorType::ACL_DST, dst);
-    }
-    sum_pack.add_tensor(TensorType::ACL_DST, tmp.get());
-    sum_pack.add_tensor(TensorType::ACL_INT_0, max.get());
-    sum_pack.add_tensor(TensorType::ACL_INT_1, sum.get());
-
-    norm_pack.add_const_tensor(TensorType::ACL_SRC, tmp.get());
-    norm_pack.add_tensor(TensorType::ACL_INT_0, sum.get());
-
-    CLScheduler::get().enqueue_op(*_max_shift_exp_sum_kernel.get(), sum_pack, false);
-    CLScheduler::get().enqueue_op(*_norm_kernel.get(), norm_pack, false);
-
-    if(_needs_permute)
-    {
-        ITensorPack pack;
-        pack.add_const_tensor(TensorType::ACL_SRC, permuted_dst.get());
-        pack.add_tensor(TensorType::ACL_DST, dst);
-        _permute_output.get()->run(pack);
-    }
-}
-
-experimental::MemoryRequirements ClSoftmax::workspace() const
-{
-    return _aux_mem;
-}
-} // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClSoftmax.h b/src/runtime/gpu/cl/operators/ClSoftmax.h
deleted file mode 100644
index f19a51fc5e..0000000000
--- a/src/runtime/gpu/cl/operators/ClSoftmax.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_SOFTMAX_H
-#define ARM_COMPUTE_CL_SOFTMAX_H
-
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-struct SoftmaxKernelInfo;
-
-namespace opencl
-{
-class ClPermute;
-namespace kernels
-{
-class ClLogits1DMaxShiftExpSumKernel;
-class ClLogits1DNormKernel;
-} // namespace kernels
-class ClSoftmax : public IClOperator
-{
-public:
-    /** Constructor */
-    ClSoftmax();
-    /** Configure the operator
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
-     * @param[out] dst             Destination tensor info. Data types supported: same as @p src
-     * @param[in]  info            Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
-     *
-     */
-    void configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &dst, const SoftmaxKernelInfo &info);
-    /** Static function to check if the given info will lead to a valid configuration
-     *
-     * @param[in]  src  Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
-     * @param[out] dst  Destination tensor info. Data types supported: same as @p src
-     * @param[in]  info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
-     *
-     */
-    static Status validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info);
-    // Inherited methods overridden:
-    void run(ITensorPack &tensors) override;
-    experimental::MemoryRequirements workspace() const override;
-
-private:
-    enum InternalTensorIdx
-    {
-        MAX = 0,
-        SUM,
-        TMP,
-        PERMUTED_SRC,
-        PERMUTED_DST,
-        COUNT
-    };
-
-    std::unique_ptr<ClPermute>                               _permute_input;
-    std::unique_ptr<ClPermute>                               _permute_output;
-    std::unique_ptr<kernels::ClLogits1DMaxShiftExpSumKernel> _max_shift_exp_sum_kernel;
-    std::unique_ptr<kernels::ClLogits1DNormKernel>           _norm_kernel;
-    bool                                                     _needs_permute{ false };
-
-    TensorInfo _max_info;
-    TensorInfo _sum_info;
-    TensorInfo _tmp_info;
-    TensorInfo _permuted_src_info;
-    TensorInfo _permuted_dst_info;
-
-    experimental::MemoryRequirements _aux_mem{};
-};
-
-} // opencl
-} // arm_compute
-#endif /* ARM_COMPUTE_CL_SOFTMAX_H */
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClSub.cpp b/src/runtime/gpu/cl/operators/ClSub.cpp
deleted file mode 100644
index 429f23a837..0000000000
--- a/src/runtime/gpu/cl/operators/ClSub.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClSub.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClSub::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst,
-                      ConvertPolicy policy, const ActivationLayerInfo &act_info)
-{
-    auto k = std::make_unique<kernels::ClSaturatedArithmeticKernel>();
-    k->configure(compile_context, ArithmeticOperation::SUB, src1, src2, dst, policy, act_info);
-    _kernel = std::move(k);
-}
-
-Status ClSub::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst,
-                       ConvertPolicy policy, const ActivationLayerInfo &act_info)
-{
-    return kernels::ClSaturatedArithmeticKernel::validate(ArithmeticOperation::SUB, src1, src2, dst, policy, act_info);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClSub.h b/src/runtime/gpu/cl/operators/ClSub.h
deleted file mode 100644
index bcad84d583..0000000000
--- a/src/runtime/gpu/cl/operators/ClSub.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_SUB_H
-#define ARM_COMPUTE_CL_SUB_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run arithmetic subtraction
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * @note The function performs an arithmetic subtraction between two tensors.
- */
-class ClSub : public IClOperator
-{
-public:
-    /** Default Constructor */
-    ClSub() = default;
-    /** Configure function for a given list of arguments.
-     *
-     * Valid configurations (src1,src2) -> dst :
-     *
-     *   - (U8,U8)           -> U8
-     *   - (U8,U8)           -> S16
-     *   - (S16,U8)          -> S16
-     *   - (U8,S16)          -> S16
-     *   - (S16,S16)         -> S16
-     *   - (S32,S32)         -> S32
-     *   - (F16,F16)         -> F16
-     *   - (F32,F32)         -> F32
-     *   - (QASYMM8,QASYMM8) -> QASYMM8
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (QSYMM16,QSYMM16) -> QSYMM16
-     *
-     * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] src1            First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
-     *                                 The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] src2            Second source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
-     *                                 The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     dst             Destination tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
-     * @param[in]      policy          Policy to use to handle overflow.
-     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
-     */
-    void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, ConvertPolicy policy,
-                   const ActivationLayerInfo &act_info = ActivationLayerInfo());
-    /** Static function to check if given info will lead to a valid configuration of @ref ClSub
-     *
-     * Valid configurations (src1,src2) -> dst :
-     *
-     *   - (U8,U8)           -> U8
-     *   - (U8,U8)           -> S16
-     *   - (S16,U8)          -> S16
-     *   - (U8,S16)          -> S16
-     *   - (S16,S16)         -> S16
-     *   - (S32,S32)         -> S32
-     *   - (F16,F16)         -> F16
-     *   - (F32,F32)         -> F32
-     *   - (QASYMM8,QASYMM8) -> QASYMM8
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (QSYMM16,QSYMM16) -> QSYMM16
-     *
-     * @param[in] src1     First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
-     * @param[in] src2     Second source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
-     * @param[in] dst      Destination tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
-     * @param[in] policy   Policy to use to handle overflow.
-     * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, ConvertPolicy policy,
-                           const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_SUB_H */
diff --git a/src/runtime/gpu/cl/operators/ClTranspose.cpp b/src/runtime/gpu/cl/operators/ClTranspose.cpp
deleted file mode 100644
index 48f44282e8..0000000000
--- a/src/runtime/gpu/cl/operators/ClTranspose.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClTranspose.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClTransposeKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClTranspose::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::ClTransposeKernel>();
-    k->configure(compile_context, src, dst);
-    _kernel = std::move(k);
-}
-
-Status ClTranspose::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
-    return kernels::ClTransposeKernel::validate(src, dst);
-}
-} // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClTranspose.h b/src/runtime/gpu/cl/operators/ClTranspose.h
deleted file mode 100644
index d898f677ca..0000000000
--- a/src/runtime/gpu/cl/operators/ClTranspose.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_TRANSPOSE_H
-#define ARM_COMPUTE_CL_TRANSPOSE_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClTransposeKernel */
-class ClTranspose : public IClOperator
-{
-public:
-    /** Constructor */
-    ClTranspose() = default;
-    /** Initialise the kernel's inputs and outputs
-     *
-     * @param[in] compile_context The compile context to be used.
-     * @param[in] src             The src tensor info. Data types supported: All.
-     * @param[in] dst             The dst tensor info. Data types supported: Same as @p src
-     */
-    void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
-    /**  Static function to check if given info will lead to a valid configuration of @ref kernels::ClTransposeKernel.
-     *
-     * @param[in] src First tensor src info. Data types supported: All.
-     * @param[in] dst Output tensor info. Data types supported: same as @p src.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_TRANSPOSE_H */
diff --git a/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp b/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp
deleted file mode 100644
index c8db697778..0000000000
--- a/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * Copyright (c) 2018-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClWinogradConv2d.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/experimental/Types.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.h"
-#include "src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.h"
-#include "src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.h"
-#include "src/core/helpers/MemoryHelpers.h"
-#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h"
-#include "support/Cast.h"
-
-using namespace arm_compute::experimental;
-
-namespace arm_compute
-{
-namespace opencl
-{
-namespace
-{
-Size2D winograd_output_tile(const Size2D &input_dims, const Size2D &kernel_dims, DataLayout data_layout)
-{
-    Size2D output_tile = Size2D{};
-
-    const unsigned int kernel_max_dim = std::max(kernel_dims.width, kernel_dims.height);
-
-    // Check if the input spatial dimensions are smaller than 4
-    const bool is_input_lt4_nchw = (input_dims.width <= 4 && input_dims.height <= 4) && (data_layout == DataLayout::NCHW);
-
-    if(kernel_max_dim == 3U)
-    {
-        if(kernel_dims == Size2D(3U, 3U))
-        {
-            output_tile = is_input_lt4_nchw ? Size2D(2U, 2U) : Size2D(4U, 4U);
-        }
-        else if(kernel_dims == Size2D(3U, 1U))
-        {
-            output_tile = is_input_lt4_nchw ? Size2D(2U, 1U) : Size2D(4U, 1U);
-        }
-        else
-        {
-            output_tile = is_input_lt4_nchw ? Size2D(1U, 2U) : Size2D(1U, 4U);
-        }
-    }
-    else if(kernel_max_dim == 5U)
-    {
-        output_tile = Size2D(kernel_dims.width == 1 ? 1U : 4U,
-                             kernel_dims.height == 1 ? 1U : 4U);
-    }
-    else if(kernel_max_dim == 7U)
-    {
-        output_tile = Size2D(kernel_dims.width == 1 ? 1U : 2U,
-                             kernel_dims.height == 1 ? 1U : 2U);
-    }
-
-    return output_tile;
-}
-
-bool check_support_fast_math(const Size2D &output_tile, const Size2D &kernel_size)
-{
-    // Check if we want to configure a Winograd configuration which requires fast math
-    using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
-
-    std::vector<WinogradConfiguration> fast_math_winograd =
-    {
-        WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
-        WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7))
-    };
-
-    auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height),
-                            std::pair<int, int>(kernel_size.width, kernel_size.height));
-
-    return std::find(fast_math_winograd.begin(), fast_math_winograd.end(), p) != fast_math_winograd.end();
-}
-
-Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info,
-                          const ActivationLayerInfo &act_info, bool enable_fast_math)
-{
-    // Get indeces for the width and height
-    const size_t idx_width  = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);
-    const size_t idx_height = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);
-
-    // Input shape, kernel size and output tile
-    const Size2D input_dims  = Size2D(src->tensor_shape()[idx_width], src->tensor_shape()[idx_height]);
-    const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]);
-    const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, src->data_layout());
-
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(((conv_info.pad_left() > (kernel_size.x() / 2u)) || (conv_info.pad_right() > (kernel_size.x() / 2u))), "Winograd only supports padding up to half kernel size");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(((conv_info.pad_top() > (kernel_size.y() / 2u)) || (conv_info.pad_bottom() > (kernel_size.y() / 2u))), "Winograd only supports padding up to half kernel size");
-
-    // Check if the Winograd configuration requires fast math
-    if(!enable_fast_math)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32); //disable winograd for fp16 if fast math is false.
-        ARM_COMPUTE_RETURN_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size), "This Winograd configuration requires enable_fast_math=true");
-    }
-
-    const WinogradInfo winograd_info = WinogradInfo(output_tile,
-                                                    kernel_size,
-                                                    input_dims,
-                                                    conv_info,
-                                                    src->data_layout());
-
-    // Validate input transform
-    const TensorShape input0_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*src, winograd_info);
-    const TensorInfo  input0       = src->clone()->set_tensor_shape(input0_shape);
-    ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWinogradInputTransformKernel::validate(src, &input0, winograd_info));
-
-    // Validate filter transform
-    const TensorShape input1_shape = misc::shape_calculator::compute_winograd_filter_transform_shape(*weights, winograd_info);
-    const TensorInfo  input1       = weights->clone()->set_tensor_shape(input1_shape);
-    ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWinogradFilterTransformKernel::validate(weights, &input1, winograd_info));
-
-    // Validate batched matrix multiply
-    TensorShape batched_mm_output_shape = input0.tensor_shape();
-    batched_mm_output_shape[0]          = input1.tensor_shape()[0];
-    const TensorInfo batched_mm_output  = input0.clone()->set_tensor_shape(batched_mm_output_shape);
-    ARM_COMPUTE_RETURN_ON_ERROR(ClGemm::validate(&input0, &input1, nullptr, &batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/, 0, false, false,
-                                                                                                                     GEMMLowpOutputStageInfo(), (src->data_type() == DataType::F16))));
-
-    // Configure output transform
-    ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWinogradOutputTransformKernel::validate(&batched_mm_output, biases, dst, winograd_info, act_info));
-    return Status{};
-}
-
-} // namespace
-
-ClWinogradConv2d::ClWinogradConv2d()
-    : _batched_mm(),
-      _input_transform(std::make_unique<kernels::ClWinogradInputTransformKernel>()),
-      _filter_transform(std::make_unique<kernels::ClWinogradFilterTransformKernel>()),
-      _output_transform(std::make_unique<kernels::ClWinogradOutputTransformKernel>()),
-      _border_handler(),
-      _input0(),
-      _input1(),
-      _batched_mm_output(),
-      _is_prepared(false),
-      _aux_mem()
-{
-}
-
-ClWinogradConv2d::~ClWinogradConv2d() = default;
-
-void ClWinogradConv2d::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
-                                 const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, bool enable_fast_math)
-{
-    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, biases, dst, conv_info, act_info, enable_fast_math));
-    // Get indices for the width and height
-    const size_t idx_width  = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);
-    const size_t idx_height = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);
-
-    // Input shape, kernel size and output tile
-    const Size2D input_dims  = Size2D(src->tensor_shape()[idx_width], src->tensor_shape()[idx_height]);
-    const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]);
-    const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, src->data_layout());
-
-    // Check if the Winograd configuration requires fast math
-    if(!enable_fast_math)
-    {
-        ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32); //disable winograd for fp16 if fast math is false.
-        ARM_COMPUTE_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size), "This Winograd configuration requires enable_fast_math=true");
-    }
-    const WinogradInfo winograd_info = WinogradInfo(output_tile,
-                                                    kernel_size,
-                                                    input_dims,
-                                                    conv_info,
-                                                    src->data_layout());
-
-    _is_prepared = false;
-
-    // Configure input transform
-    _input_transform->configure(compile_context, src, &_input0, winograd_info);
-    _border_handler.configure(compile_context, src, _input_transform->border_size(), BorderMode::CONSTANT, PixelValue());
-
-    // Configure filter transform
-    _filter_transform->configure(compile_context, weights, &_input1, winograd_info);
-
-    // Configure batched matrix multiply
-    _batched_mm.configure(compile_context, &_input0, &_input1, nullptr, &_batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/, 0,
-                                                                                                                  false, false,
-                                                                                                                  GEMMLowpOutputStageInfo(),
-                                                                                                                  (src->data_type() == DataType::F16)));
-
-    // Configure output transform
-    _output_transform->configure(compile_context, &_batched_mm_output, biases, dst, winograd_info, act_info);
-
-    _aux_mem = _batched_mm.workspace();
-    _aux_mem.push_back(MemoryInfo(offset_int_vec(2), MemoryLifetime::Temporary, _input0.total_size()));
-    _aux_mem.push_back(MemoryInfo(offset_int_vec(3), MemoryLifetime::Persistent, _input1.total_size()));
-    _aux_mem.push_back(MemoryInfo(offset_int_vec(4), MemoryLifetime::Temporary, _batched_mm_output.total_size()));
-}
-
-Status ClWinogradConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info,
-                                  const ActivationLayerInfo &act_info, bool enable_fast_math)
-{
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info, act_info, enable_fast_math));
-    return Status{};
-}
-
-void ClWinogradConv2d::run(ITensorPack &tensors)
-{
-    prepare(tensors);
-
-    // Run input transform
-    auto src    = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
-    auto biases = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_2));
-    auto dst    = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
-
-    CLAuxTensorHandler input0(offset_int_vec(2), _input0, tensors, true);
-    CLAuxTensorHandler input1(offset_int_vec(3), _input1, tensors, true);
-    CLAuxTensorHandler batched_mm_output(offset_int_vec(4), _batched_mm_output, tensors, true);
-
-    ITensorPack pack_it
-    {
-        { TensorType::ACL_SRC, src },
-        { TensorType::ACL_DST, input0.get() },
-    };
-    CLScheduler::get().enqueue_op(_border_handler, pack_it);
-    CLScheduler::get().enqueue_op(*_input_transform, pack_it);
-
-    // Run batched matrix multiplication
-    ITensorPack pack_mm
-    {
-        { TensorType::ACL_SRC_0, input0.get() },
-        { TensorType::ACL_SRC_1, input1.get() },
-        { TensorType::ACL_DST, batched_mm_output.get() },
-    };
-    _batched_mm.run(pack_mm);
-
-    // Run output transform
-    ITensorPack pack_ot
-    {
-        { TensorType::ACL_SRC_0, batched_mm_output.get() },
-        { TensorType::ACL_SRC_1, biases },
-        { TensorType::ACL_DST, dst },
-    };
-    CLScheduler::get().enqueue_op(*_output_transform, pack_ot);
-}
-
-void ClWinogradConv2d::prepare(ITensorPack &tensors)
-{
-    if(!_is_prepared)
-    {
-        auto       weights = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1));
-        ICLTensor *in1_aux = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(offset_int_vec(3)));
-
-        CLAuxTensorHandler input1(_input1, *in1_aux);
-        ITensorPack        pack_ft
-        {
-            { TensorType::ACL_SRC, weights },
-            { TensorType::ACL_DST, input1.get() },
-        };
-        // Run filter transform and mark original weights as unused
-        CLScheduler::get().enqueue_op(*_filter_transform, pack_ft, false);
-        weights->mark_as_unused();
-
-        tensors.add_tensor(ACL_SRC_1, input1.get());
-        // Prepare GEMM and release reshaped weights if marked unused by ClGemm
-        _batched_mm.prepare(tensors);
-
-        CLScheduler::get().queue().finish();
-        _is_prepared = true;
-    }
-}
-
-experimental::MemoryRequirements ClWinogradConv2d::workspace() const
-{
-    return _aux_mem;
-}
-} // namespace opencl
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClWinogradConv2d.h b/src/runtime/gpu/cl/operators/ClWinogradConv2d.h
deleted file mode 100644
index 83b31f1c99..0000000000
--- a/src/runtime/gpu/cl/operators/ClWinogradConv2d.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2018-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_WINOGRADCONV2D_H
-#define ARM_COMPUTE_CL_WINOGRADCONV2D_H
-
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-#include "src/runtime/gpu/cl/operators/ClGemm.h"
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ITensorInfo;
-namespace opencl
-{
-namespace kernels
-{
-class ClWinogradInputTransformKernel;
-class ClWinogradFilterTransformKernel;
-class ClWinogradOutputTransformKernel;
-} // kernels
-/** Basic function to execute Winograd-based convolution on OpenCL. This function calls the following OpenCL functions/kernels:
- *
- *  -# @ref kernels::ClWinogradInputTransformKernel
- *  -# @ref kernels::ClWinogradFilterTransformKernel (only once)
- *  -# @ref ClGemm
- *  -# @ref kernels::ClWinogradOutputTransformKernel
- *
- */
-class ClWinogradConv2d : public IClOperator
-{
-public:
-    /** Default constructor */
-    ClWinogradConv2d();
-    /** Default destructor */
-    ~ClWinogradConv2d();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    ClWinogradConv2d(const ClWinogradConv2d &) = delete;
-    /** Default move constructor */
-    ClWinogradConv2d(ClWinogradConv2d &&) = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    ClWinogradConv2d &operator=(const ClWinogradConv2d &) = delete;
-    /** Default move assignment operator */
-    ClWinogradConv2d &operator=(ClWinogradConv2d &&) = default;
-    /** Set the input and output tensors.
-     *
-     * Valid data layouts:
-     * - NHWC
-     * - NCHW
-     *
-     * Valid data type configurations:
-     * |src0           |src1           |src2   |dst            |
-     * |:--------------|:--------------|:------|:--------------|
-     * |F16            |F16            |F16    |F16            |
-     * |F32            |F32            |F32    |F32            |
-     *
-     * @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout
-     * @note  Some Winograd configurations (i.e. F(4x4, 5x5)) are supported only with enable_fast_math = true
-     *
-     * @param[in]  compile_context  The compile context to be used.
-     * @param[in]  src              Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
-     *                              while every optional dimension from 4 and above represent a batch of inputs.
-     *                              Data types supported: F16/F32.
-     * @param[in]  weights          Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p src.
-     * @param[in]  biases           Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p src
-     * @param[out] dst              Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
-     *                              Data types supported: Same as @p src.
-     * @param[in]  conv_info        Contains padding and stride information described in @ref PadStrideInfo.
-     * @param[in]  act_info         (Optional) Activation layer information in case of a fused activation.
-     * @param[in]  enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
-     *                              available which may introduce a drop of accuracy as well. Default is false
-     */
-    void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info,
-                   const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
-    /** Static function to check if given info will lead to a valid configuration
-     *
-     * Similar to ClWinogradConv2d::configure()
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info,
-                           const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
-
-    // Inherited method overridden
-    void run(ITensorPack &tensors) override;
-    void prepare(ITensorPack &tensors) override;
-    experimental::MemoryRequirements workspace() const override;
-
-private:
-    ClGemm                                                    _batched_mm;
-    std::unique_ptr<kernels::ClWinogradInputTransformKernel>  _input_transform;
-    std::unique_ptr<kernels::ClWinogradFilterTransformKernel> _filter_transform;
-    std::unique_ptr<kernels::ClWinogradOutputTransformKernel> _output_transform;
-    CLFillBorderKernel                                        _border_handler;
-    TensorInfo                                                _input0;
-    TensorInfo                                                _input1;
-    TensorInfo                                                _batched_mm_output;
-    bool                                                      _is_prepared;
-    experimental::MemoryRequirements                          _aux_mem{};
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_WINOGRADCONV2D_H */
diff --git a/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h b/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h
deleted file mode 100644
index 152e3c6c04..0000000000
--- a/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_UTILS_CL_AUX_TENSOR_HANDLER_H
-#define ARM_COMPUTE_CL_UTILS_CL_AUX_TENSOR_HANDLER_H
-
-#include "arm_compute/core/ITensorPack.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-
-#include "support/Cast.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/* Tensor handler to wrap and handle tensor allocations on workspace buffers */
-class CLAuxTensorHandler
-{
-public:
-    CLAuxTensorHandler(int slot_id, TensorInfo &info, ITensorPack &pack, bool pack_inject = false)
-        : _tensor()
-    {
-        _tensor.allocator()->soft_init(info);
-
-        ICLTensor *packed_tensor = utils::cast::polymorphic_downcast<ICLTensor *>(pack.get_tensor(slot_id));
-        if((packed_tensor == nullptr) || (info.total_size() > packed_tensor->info()->total_size()))
-        {
-            _tensor.allocator()->allocate();
-            if(pack_inject)
-            {
-                pack.add_tensor(slot_id, &_tensor);
-                _injected_tensor_pack = &pack;
-                _injected_slot_id     = slot_id;
-            }
-        }
-        else
-        {
-            _tensor.allocator()->import_memory(packed_tensor->cl_buffer());
-        }
-    }
-
-    CLAuxTensorHandler(TensorInfo &info, ICLTensor &tensor)
-        : _tensor()
-    {
-        _tensor.allocator()->soft_init(info);
-        if(info.total_size() <= tensor.info()->total_size())
-        {
-            _tensor.allocator()->import_memory(tensor.cl_buffer());
-        }
-    }
-
-    CLAuxTensorHandler(const CLAuxTensorHandler &) = delete;
-    CLAuxTensorHandler &operator=(const CLAuxTensorHandler) = delete;
-
-    ~CLAuxTensorHandler()
-    {
-        if(_injected_tensor_pack)
-        {
-            _injected_tensor_pack->remove_tensor(_injected_slot_id);
-        }
-    }
-
-    ICLTensor *get()
-    {
-        return &_tensor;
-    }
-
-    ICLTensor *operator()()
-    {
-        return &_tensor;
-    }
-
-private:
-    CLTensor     _tensor{};
-    ITensorPack *_injected_tensor_pack{ nullptr };
-    int          _injected_slot_id{ TensorType::ACL_UNKNOWN };
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_UTILS_CL_AUX_TENSOR_HANDLER_H */
-\ No newline at end of file