25 files changed, 0 insertions, 4587 deletions
diff --git a/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp
deleted file mode 100644
index e7ff13692e..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-GCAbsoluteDifferenceKernel::GCAbsoluteDifferenceKernel()
-    : _input1(nullptr), _input2(nullptr), _output(nullptr)
-{
-}
-
-void GCAbsoluteDifferenceKernel::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output);
-
-    _input1 = input1;
-    _input2 = input2;
-    _output = output;
-
-    constexpr unsigned int num_elems_processed_per_iteration = 4;
-
-    // Set kernel build options
-    std::set<std::string> build_opts;
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("absdiff", build_opts));
-
-    // Configure kernel window
-    Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration));
-
-    AccessWindowRectangle input1_access(input1->info(), 0, 0, 4, 1);
-    AccessWindowRectangle input2_access(input2->info(), 0, 0, 4, 1);
-    AccessWindowRectangle output_access(output->info(), 0, 0, 4, 1);
-
-    update_window_and_padding(win, input1_access, input2_access, output_access);
-
-    ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(),
-                                                       input2->info()->valid_region());
-
-    output_access.set_valid_region(win, valid_region);
-
-    IGCKernel::configure(win);
-}
-
-void GCAbsoluteDifferenceKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
-    _kernel.use();
-
-    Window slice = window.first_slice_window_2D();
-    do
-    {
-        unsigned int idx     = 0;
-        unsigned int binding = 1; // SSBO binding starts from 1.
-        add_2D_tensor_argument(idx, _input1, binding++, slice);
-        add_2D_tensor_argument(idx, _input2, binding++, slice);
-        add_2D_tensor_argument(idx, _output, binding++, slice);
-
-        _kernel.update_shader_params();
-
-        enqueue(*this, slice);
-    }
-    while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
deleted file mode 100644
index 5aad8070fc..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h"
-
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-GCActivationLayerKernel::GCActivationLayerKernel(GCCoreRuntimeContext *ctx)
-    : _input(nullptr), _output(nullptr), _ctx(ctx)
-{
-}
-
-void GCActivationLayerKernel::configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-
-    // Make sure _kernel is initialized before calling the parent's configure
-    _input  = input;
-    _output = input;
-
-    if(output != nullptr)
-    {
-        // Output auto inizialitation if not yet initialized
-        auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type());
-
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
-        _output = output;
-    }
-
-    unsigned int num_elems_processed_per_iteration = 4 / input->info()->element_size();
-
-    // Set build options
-    std::set<std::string> build_opts;
-    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.emplace(("#define " + string_from_activation_func(act_info.activation())));
-    build_opts.emplace(("#define " + dt_name));
-    build_opts.emplace(("#define A_VAL " + float_to_string_with_full_precision(act_info.a())));
-    build_opts.emplace(("#define B_VAL " + float_to_string_with_full_precision(act_info.b())));
-    build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)));
-    build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)));
-    build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)));
-
-    // Create kernel
-    _kernel = create_opengl_kernel(_ctx, "activation_layer", build_opts);
-
-    // Configure kernel window
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
-    if(output != nullptr)
-    {
-        AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
-        update_window_and_padding(win,
-                                  AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
-                                  output_access);
-
-        output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-    }
-    else
-    {
-        update_window_and_padding(win,
-                                  AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
-    }
-
-    IGCKernel::configure(win);
-}
-
-void GCActivationLayerKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
-    _kernel.use();
-
-    _output->set_needs_shifting(true);
-
-    Window collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
-    Window slice     = collapsed.first_slice_window_3D();
-    Window slice_in  = collapsed.first_slice_window_3D();
-
-    slice.shift(Window::DimX, -(_output->info()->padding()).left);
-
-    if(_input == _output)
-    {
-        slice_in.shift(Window::DimX, -(_input->info()->padding()).left);
-    }
-
-    do
-    {
-        unsigned int idx     = 0;
-        unsigned int binding = 1;
-        add_3D_tensor_argument(idx, _input, binding++, slice);
-        add_3D_tensor_argument(idx, _output, binding++, slice_in);
-        _kernel.update_shader_params();
-        enqueue(*this, slice);
-    }
-    while(collapsed.slide_window_slice_3D(slice) && collapsed.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp
deleted file mode 100644
index 0a5fe11347..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <cstddef>
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)
-{
-    ARM_COMPUTE_UNUSED(policy);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F16);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::F16);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, input2);
-
-    // Validate in case of configured output
-    if((output != nullptr) && (output->total_size() != 0))
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16);
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, output);
-    }
-
-    return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output)
-{
-    constexpr unsigned int num_elems_processed_per_iteration = 8;
-
-    Window win = calculate_max_window(*input1, Steps(num_elems_processed_per_iteration));
-
-    AccessWindowHorizontal input1_access(input1, 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal input2_access(input2, 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-
-    bool window_changed = update_window_and_padding(win, input1_access, input2_access, output_access);
-
-    ValidRegion valid_region = intersect_valid_regions(input1->valid_region(),
-                                                       input2->valid_region());
-
-    output_access.set_valid_region(win, valid_region);
-
-    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
-    return std::make_pair(err, win);
-}
-} // namespace
-
-GCArithmeticAdditionKernel::GCArithmeticAdditionKernel()
-    : _input1(nullptr), _input2(nullptr), _output(nullptr)
-{
-}
-
-void GCArithmeticAdditionKernel::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, ConvertPolicy policy)
-{
-    ARM_COMPUTE_UNUSED(policy);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
-
-    // Auto initialize output if not initialized
-    {
-        set_shape_if_empty(*output->info(), input1->info()->tensor_shape());
-        set_format_if_unknown(*output->info(), Format::F16);
-    }
-
-    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info(), policy));
-
-    _input1 = input1;
-    _input2 = input2;
-    _output = output;
-
-    // Set kernel build options
-    std::set<std::string> build_opts;
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("arithmetic_add", build_opts));
-
-    // Configure kernel window
-    auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info());
-    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    IGCKernel::configure(win_config.second);
-}
-
-Status GCArithmeticAdditionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)
-{
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output, policy));
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), input2->clone().get(), output->clone().get()).first);
-
-    return Status{};
-}
-
-void GCArithmeticAdditionKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
-    _kernel.use();
-
-    _output->set_needs_shifting(true);
-
-    Window slice    = window.first_slice_window_3D();
-    Window slice_in = window.first_slice_window_3D();
-
-    slice.shift(Window::DimX, -(_output->info()->padding()).left);
-
-    do
-    {
-        unsigned int idx     = 0;
-        unsigned int binding = 1; // SSBO binding starts from 1.
-        add_3D_tensor_argument(idx, _input1, binding++, slice_in);
-        add_3D_tensor_argument(idx, _input2, binding++, slice_in);
-        add_3D_tensor_argument(idx, _output, binding++, slice);
-
-        _kernel.update_shader_params();
-
-        enqueue(*this, slice);
-    }
-    while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
deleted file mode 100644
index a1f7cd7eca..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
-                          const ITensorInfo *mean, const ITensorInfo *var,
-                          const ITensorInfo *beta, const ITensorInfo *gamma,
-                          float epsilon, ActivationLayerInfo act_info)
-{
-    ARM_COMPUTE_UNUSED(epsilon);
-    ARM_COMPUTE_UNUSED(var);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, var);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(mean, var);
-
-    if(output->total_size() != 0)
-    {
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    }
-
-    if(beta != nullptr)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, beta);
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, beta);
-    }
-    if(gamma != nullptr)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, gamma);
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, gamma);
-    }
-    if(act_info.enabled())
-    {
-        ARM_COMPUTE_ERROR_ON(input->data_type() != DataType::F32 && input->data_type() != DataType::F16);
-        ARM_COMPUTE_ERROR_ON(act_info.activation() != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::RELU
-                             && act_info.activation() != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU
-                             && act_info.activation() != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);
-        ARM_COMPUTE_ERROR_ON(act_info.b() > act_info.a());
-    }
-    return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output,
-                                                        ITensorInfo *mean, ITensorInfo *var,
-                                                        ITensorInfo *beta, ITensorInfo *gamma)
-{
-    // Output tensor auto initialization if not yet initialized
-    auto_init_if_empty(*output, input->tensor_shape(), 1, input->data_type());
-
-    unsigned int num_elems_processed_per_iteration = 1;
-    if(input->data_type() == DataType::F16)
-    {
-        num_elems_processed_per_iteration = 4;
-    }
-
-    // Configure kernel window
-    Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
-
-    AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-    AccessWindowStatic     mean_access(mean, 0, 0, mean->dimension(0) + 3, mean->dimension(1));
-    AccessWindowStatic     var_access(var, 0, 0, var->dimension(0) + 3, var->dimension(1));
-
-    bool window_changed = false;
-    if(beta != nullptr)
-    {
-        AccessWindowStatic beta_access(beta, 0, 0, beta->dimension(0) + 3, beta->dimension(1));
-        if(gamma != nullptr)
-        {
-            AccessWindowStatic gamma_access(gamma, 0, 0, gamma->dimension(0) + 3, gamma->dimension(1));
-            window_changed = update_window_and_padding(win, input_access, output_access, mean_access, var_access, beta_access, gamma_access);
-        }
-        else
-        {
-            window_changed = update_window_and_padding(win, input_access, output_access, mean_access, var_access, beta_access);
-        }
-    }
-    else
-    {
-        if(gamma != nullptr)
-        {
-            AccessWindowStatic gamma_access(gamma, 0, 0, gamma->dimension(0) + 3, gamma->dimension(1));
-            window_changed = update_window_and_padding(win, input_access, output_access, mean_access, var_access, gamma_access);
-        }
-        else
-        {
-            window_changed = update_window_and_padding(win, input_access, output_access, mean_access, var_access);
-        }
-    }
-    output_access.set_valid_region(win, input->valid_region());
-
-    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
-    return std::make_pair(err, win);
-}
-} // namespace
-
-GCBatchNormalizationLayerKernel::GCBatchNormalizationLayerKernel()
-    : _input(nullptr), _output(nullptr), _mean(nullptr), _var(nullptr), _beta(nullptr), _gamma(nullptr), _epsilon(0.0f)
-{
-}
-
-void GCBatchNormalizationLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma,
-                                                float epsilon, ActivationLayerInfo act_info)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, var);
-
-    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), mean->info(), var->info(),
-                                                  (beta != nullptr) ? beta->info() : nullptr, (gamma != nullptr) ? gamma->info() : nullptr,
-                                                  epsilon, act_info));
-
-    _input   = input;
-    _output  = output;
-    _mean    = mean;
-    _var     = var;
-    _beta    = beta;
-    _gamma   = gamma;
-    _epsilon = epsilon;
-
-    // Set build options
-    std::set<std::string> build_opts;
-    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.emplace(("#define " + dt_name));
-    build_opts.emplace(("#define ESPILON " + float_to_string_with_full_precision(_epsilon)));
-    build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)));
-    build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)));
-    build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)));
-    if(beta == nullptr)
-    {
-        build_opts.emplace("#define USE_DEFAULT_BETA");
-    }
-    if(gamma == nullptr)
-    {
-        build_opts.emplace("#define USE_DEFAULT_GAMMA");
-    }
-
-    if(act_info.enabled())
-    {
-        build_opts.emplace("#define " + string_from_activation_func(act_info.activation()));
-        build_opts.emplace("#define A_VAL " + float_to_string_with_full_precision(act_info.a()));
-        build_opts.emplace("#define B_VAL " + float_to_string_with_full_precision(act_info.b()));
-    }
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("batchnormalization_layer", build_opts));
-
-    // Configure kernel window
-    auto win_config = validate_and_configure_window(input->info(), output->info(), mean->info(), var->info(),
-                                                    (beta != nullptr) ? beta->info() : nullptr, (gamma != nullptr) ? gamma->info() : nullptr);
-    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-
-    IGCKernel::configure(win_config.second);
-}
-
-Status GCBatchNormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
-                                                 const ITensorInfo *mean, const ITensorInfo *var,
-                                                 const ITensorInfo *beta, const ITensorInfo *gamma,
-                                                 float epsilon, ActivationLayerInfo act_info)
-{
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, mean, var, beta, gamma, epsilon, act_info));
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(),
-                                                              mean->clone().get(), var->clone().get(),
-                                                              beta->clone().get(), gamma->clone().get())
-                                .first);
-
-    return Status{};
-}
-
-void GCBatchNormalizationLayerKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
-    _kernel.use();
-
-    _output->set_needs_shifting(true);
-
-    Window slice    = window.first_slice_window_3D();
-    Window slice_in = window.first_slice_window_3D();
-
-    Window vector_slice = window.first_slice_window_1D();
-    vector_slice.set(Window::DimX, Window::Dimension(0, 0, 0));
-
-    unsigned int idx           = 2 * num_arguments_per_3D_tensor();
-    unsigned int binding_point = 3;
-    add_1D_tensor_argument(idx, _mean, binding_point, vector_slice);
-    add_1D_tensor_argument(idx, _var, ++binding_point, vector_slice);
-    if(_beta != nullptr)
-    {
-        add_1D_tensor_argument(idx, _beta, ++binding_point, vector_slice);
-    }
-    if(_gamma != nullptr)
-    {
-        add_1D_tensor_argument(idx, _gamma, ++binding_point, vector_slice);
-    }
-
-    slice.shift(Window::DimX, -(_output->info()->padding()).left);
-
-    do
-    {
-        idx = 0;
-        add_3D_tensor_argument(idx, _input, 1, slice_in);
-        add_3D_tensor_argument(idx, _output, 2, slice);
-
-        _kernel.update_shader_params();
-        enqueue(*this, slice);
-    }
-    while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
deleted file mode 100644
index 1e48dc8e88..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-GCCol2ImKernel::GCCol2ImKernel()
-    : _input(nullptr), _output(nullptr), _convolved_dims()
-{
-}
-
-void GCCol2ImKernel::configure(const IGCTensor *input, IGCTensor *output,
-                               std::pair<unsigned int, unsigned int> convolved_dims)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-
-    TensorShape output_shape = input->info()->tensor_shape();
-    output_shape.set(0, convolved_dims.first);
-    output_shape.set(1, convolved_dims.second);
-    output_shape.set(2, input->info()->tensor_shape()[0]);
-
-    // Output auto inizialitation if not yet initialized
-    auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
-    _input          = input;
-    _output         = output;
-    _convolved_dims = convolved_dims;
-
-    const DataType     dt         = input->info()->data_type();
-    const unsigned int local_size = 1;
-
-    // Create kernel
-    std::set<std::string> build_opts;
-    build_opts.emplace("#define COL2IM ");
-    build_opts.emplace("#define WIDTH_OUTPUT " + support::cpp11::to_string(_convolved_dims.first));
-    const std::string dt_name = (dt == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.emplace(("#define " + dt_name));
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(local_size));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(local_size));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(local_size));
-
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("col2im", build_opts));
-
-    // Configure window
-    const unsigned int num_elems_processed_per_iteration = (dt == DataType::F32) ? 1 : 2;
-
-    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-    const int              input_padding = ceil_to_multiple(input->info()->dimension(0), 2) - input->info()->dimension(0);
-
-    AccessWindowStatic input_access(input->info(), 0, 0, input->info()->dimension(0) + input_padding, input->info()->dimension(1) + 1);
-
-    update_window_and_padding(win, input_access, output_access);
-
-    output_access.set_valid_region(win, output->info()->valid_region());
-
-    IGCKernel::configure(win);
-}
-
-void GCCol2ImKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
-
-    _kernel.use();
-
-    Window collapsed_window = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
-    Window slice            = collapsed_window.first_slice_window_3D();
-
-    // Set static kernel arguments
-    unsigned int idx = 2 * num_arguments_per_3D_tensor();
-    //_kernel.set_argument(idx++, _output->info()->strides_in_bytes()[3]);
-    _kernel.set_argument(idx++, uint(_output->info()->dimension(2)));
-    _kernel.set_argument(idx++, _input->info()->strides_in_bytes()[2]);
-
-    do
-    {
-        // Set inputs
-        unsigned int idx = 0;
-        add_2D_tensor_argument(idx, _input, 1, slice);
-        add_3D_tensor_argument(idx, _output, 2, slice);
-        _kernel.update_shader_params();
-        enqueue(*this, slice);
-    }
-    while(collapsed_window.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
deleted file mode 100644
index c6345ba679..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-GCDepthConcatenateLayerKernel::GCDepthConcatenateLayerKernel()
-    : _input(nullptr), _output(nullptr), _depth_offset(0)
-{
-}
-void GCDepthConcatenateLayerKernel::configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(Window::DimX) != output->info()->dimension(Window::DimX));
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(Window::DimY) != output->info()->dimension(Window::DimY));
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2));
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output);
-
-    _input        = input;
-    _output       = output;
-    _depth_offset = depth_offset;
-
-    // Add build options
-    std::set<std::string> build_opts;
-    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.emplace(("#define " + dt_name));
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("concatenate_depth", build_opts));
-
-    unsigned int num_elems_processed_per_iteration = 1;
-    if(input->info()->data_type() == DataType::F16)
-    {
-        num_elems_processed_per_iteration = 4;
-    }
-
-    // The window needs to be based on input as we copy all the depths of input
-    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-    win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1));
-
-    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-    update_window_and_padding(win, input_access, output_access);
-    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
-    IGCKernel::configure(win);
-}
-
-void GCDepthConcatenateLayerKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
-    _kernel.use();
-
-    _output->set_needs_shifting(true);
-
-    Window slice_in  = window.first_slice_window_3D();
-    Window slice_out = window.first_slice_window_3D();
-
-    slice_out.set(Window::DimZ, Window::Dimension(_depth_offset));
-
-    do
-    {
-        unsigned int idx = 0;
-        add_3D_tensor_argument(idx, _input, 1, slice_in);
-        add_3D_tensor_argument(idx, _output, 2, slice_out);
-
-        _kernel.update_shader_params();
-
-        enqueue(*this, slice_in);
-    }
-    while(window.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp
deleted file mode 100644
index c60f4688a6..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-GCDepthwiseConvolutionLayer3x3Kernel::GCDepthwiseConvolutionLayer3x3Kernel()
-    : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_left(0), _conv_pad_top(0), _lws(gles::NDRange(1U, 1U, 1U))
-{
-}
-
-BorderSize GCDepthwiseConvolutionLayer3x3Kernel::border_size() const
-{
-    return _border_size;
-}
-
-void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info,
-                                                     unsigned int depth_multiplier)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
-    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3);
-
-    if(biases != nullptr)
-    {
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);
-        ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(2));
-        ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1);
-    }
-
-    // Get convolved dimensions
-    const TensorShape output_shape = compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier);
-
-    // Output auto inizialitation if not yet initialized
-    auto_init_if_empty(*output->info(),
-                       output_shape,
-                       1,
-                       input->info()->data_type());
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
-    ARM_COMPUTE_ERROR_ON(output->info()->dimension(2) != weights->info()->dimension(2));
-
-    _input         = input;
-    _output        = output;
-    _weights       = weights;
-    _biases        = biases;
-    _conv_stride_x = conv_info.stride().first;
-    _conv_stride_y = conv_info.stride().second;
-    _conv_pad_left = conv_info.pad_left();
-    _conv_pad_top  = conv_info.pad_top();
-    _border_size   = BorderSize(_conv_pad_top, conv_info.pad_right(), conv_info.pad_bottom(), _conv_pad_left);
-
-    // Set build options
-    ARM_COMPUTE_ERROR_ON(_conv_stride_x < 1 || _conv_stride_x > 3);
-    std::set<std::string> options;
-
-    options.emplace("#define DEPTH_MULTIPLIER " + support::cpp11::to_string(depth_multiplier));
-    options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0]));
-    options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1]));
-    options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2]));
-    options.emplace("#define STRIDE_X " + support::cpp11::to_string(_conv_stride_x));
-    options.emplace("#define STRIDE_Y " + support::cpp11::to_string(_conv_stride_y));
-
-    std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    options.emplace(("#define " + dt_name));
-
-    unsigned int num_elems_read_per_iteration_x    = 8;
-    unsigned int num_elems_read_per_iteration_y    = 1;
-    unsigned int num_elems_written_per_iteration_x = 4;
-    unsigned int num_elems_written_per_iteration_y = 1;
-    unsigned int num_elems_written_per_iteration_z = 1;
-
-    if((_conv_stride_x == 1) && (_conv_stride_y == 1))
-    {
-        switch(input->info()->data_type())
-        {
-#define PROCESS_4X_3Y_1Z
-
-            case DataType::F16:
-#if defined(PROCESS_4X_3Y_1Z)
-                options.emplace("#define PROCESS_4X_3Y_1Z");
-                num_elems_read_per_iteration_y    = 5;
-                num_elems_written_per_iteration_y = 3;
-#endif /* PROCESS_4X_3Y_1Z */
-#undef PROCESS_4X_3Y_1Z
-                break;
-
-            default:
-                ARM_COMPUTE_ERROR("Current data type is not supported");
-                break;
-        }
-    }
-    else
-    {
-        switch(input->info()->data_type())
-        {
-            case DataType::F16:
-                options.emplace("#define PROCESS_4X_1Y_1Z");
-                break;
-
-            default:
-                ARM_COMPUTE_ERROR("Current data type is not supported");
-                break;
-        }
-    }
-
-    if(_biases != nullptr)
-    {
-        options.emplace("#define BIAS");
-    }
-
-    // Create kernel
-    std::string kernel_name = "depthwise_convolution_3x3";
-    _kernel                 = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, options));
-
-    // Calculate output right and bottom border
-    const int output_width          = output->info()->dimension(0);
-    const int output_height         = output->info()->dimension(1);
-    const int output_padding_right  = ceil_to_multiple(output_width, num_elems_written_per_iteration_x * _lws[0]) - output_width;
-    const int output_padding_bottom = ceil_to_multiple(output_height, num_elems_written_per_iteration_y * _lws[1]) - output_height;
-
-    // Calculate input right and bottom border
-    const int input_width  = input->info()->dimension(0);
-    const int input_height = input->info()->dimension(1);
-
-    const int input_total_width  = std::max(int(input->info()->padding().left), int(_conv_pad_left)) + input_width + std::max(int(input->info()->padding().right), int(_conv_pad_left));
-    const int input_total_height = std::max(int(input->info()->padding().top), int(_conv_pad_top)) + input_height + std::max(int(input->info()->padding().bottom), int(_conv_pad_top));
-
-    const int input_padding_right  = ceil_to_multiple(input_total_width, num_elems_read_per_iteration_x * _lws[0]) - input_width - _conv_pad_left;
-    const int input_padding_bottom = ceil_to_multiple(input_total_height, num_elems_read_per_iteration_y * _lws[1]) - input_height - _conv_pad_top;
-
-    BorderSize border = BorderSize(0, output_padding_right, output_padding_bottom, 0);
-
-    Window win = calculate_max_enlarged_window(*output->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y, num_elems_written_per_iteration_z), border);
-
-    AccessWindowStatic input_access(input->info(), -_conv_pad_left, -_conv_pad_top, input_width + input_padding_right, input_height + input_padding_bottom);
-    AccessWindowStatic weights_access = AccessWindowStatic(nullptr, 0, 0, 0, 0);
-    AccessWindowStatic bias_access    = AccessWindowStatic(nullptr, 0, 0, 0, 1);
-
-    switch(weights->info()->data_type())
-    {
-        case DataType::F16:
-            weights_access = AccessWindowStatic(weights->info(), 0, 0, 4, 3);
-            if(_biases != nullptr)
-            {
-                bias_access = AccessWindowStatic(_biases->info(), 0, 0, _biases->info()->dimension(0) + 1, 1);
-            }
-            break;
-
-        default:
-            ARM_COMPUTE_ERROR("Current data type is not supported");
-            break;
-    }
-
-    AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height + output_padding_bottom);
-
-    if(_biases != nullptr)
-    {
-        update_window_and_padding(win, input_access, weights_access, bias_access, output_access);
-    }
-    else
-    {
-        update_window_and_padding(win, input_access, weights_access, output_access);
-    }
-
-    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
-    IGCKernel::configure(win);
-}
-
-void GCDepthwiseConvolutionLayer3x3Kernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
-    _kernel.use();
-
-    _output->set_needs_shifting(true);
-
-    // Create input window and adjust
-    Window win_in = window;
-    win_in.adjust(Window::DimX, -_conv_pad_left, true);
-    win_in.adjust(Window::DimY, -_conv_pad_top, true);
-    win_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x);
-    win_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y);
-
-    Window slice_in      = win_in.first_slice_window_3D();
-    Window slice_out     = window.first_slice_window_3D();
-    Window slice_weights = window.first_slice_window_3D();
-    slice_weights.set_dimension_step(Window::DimX, 0);
-    slice_weights.set_dimension_step(Window::DimY, 0);
-
-    // Set biases
-    if(_biases != nullptr)
-    {
-        unsigned int idx = 3 * num_arguments_per_3D_tensor();
-        Window       slice_biases;
-        slice_biases.use_tensor_dimensions(_biases->info()->tensor_shape());
-        add_1D_tensor_argument(idx, _biases, 4, slice_biases);
-    }
-
-    slice_out.shift(Window::DimX, -(_output->info()->padding()).left);
-
-    do
-    {
-        unsigned int idx = 0;
-        add_3D_tensor_argument(idx, _input, 1, slice_in);
-        add_3D_tensor_argument(idx, _output, 2, slice_out);
-        add_3D_tensor_argument(idx, _weights, 3, slice_weights);
-
-        _kernel.update_shader_params();
-        enqueue(*this, slice_out, _lws);
-    }
-    while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
deleted file mode 100644
index f3e47d9ae9..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-template <unsigned int kernel_size>
-GCDirectConvolutionLayerKernel<kernel_size>::GCDirectConvolutionLayerKernel()
-    : _input(nullptr), _bias(nullptr), _weights(nullptr), _output(nullptr), _border_size(0), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_x(0), _conv_pad_y(0), _lws(gles::NDRange(1U, 1U, 1U))
-{
-}
-
-template <unsigned int kernel_size>
-BorderSize GCDirectConvolutionLayerKernel<kernel_size>::border_size() const
-{
-    return _border_size;
-}
-
-template <unsigned int kernel_size>
-void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output,
-                                                            const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2));
-    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != weights->info()->dimension(1));
-    ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
-    ARM_COMPUTE_ERROR_ON_MSG((kernel_size == 3 && std::get<0>(conv_info.stride()) > 2), "Strides larger than 2 not supported in 3x3 direct convolution!");
-    ARM_COMPUTE_ERROR_ON(kernel_size != weights->info()->dimension(0));
-    ARM_COMPUTE_ERROR_ON(act_info.enabled() && act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU && act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC);
-
-    if(bias != nullptr)
-    {
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, bias);
-        // FIXME: Bug in framework, workaround it in tests currently.
-        //ARM_COMPUTE_ERROR_ON(bias->info()->dimension(0) != weights->info()->dimension(3));
-        ARM_COMPUTE_ERROR_ON(bias->info()->num_dimensions() > 1);
-    }
-
-    // Get convolved dimensions
-    unsigned int owidth       = 0;
-    unsigned int oheight      = 0;
-    std::tie(owidth, oheight) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_size, kernel_size, conv_info);
-
-    TensorShape output_shape = input->info()->tensor_shape();
-    output_shape.set(0, owidth);
-    output_shape.set(1, oheight);
-    output_shape.set(2, weights->info()->dimension(3));
-
-    // Output auto inizialitation if not yet initialized
-    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_ERROR_ON(!conv_info.padding_is_symmetric());
-
-    _conv_stride_x = std::get<0>(conv_info.stride());
-    _conv_stride_y = std::get<1>(conv_info.stride());
-    _conv_pad_x    = std::get<0>(conv_info.pad());
-    _conv_pad_y    = std::get<1>(conv_info.pad());
-
-    _input       = input;
-    _weights     = weights;
-    _output      = output;
-    _bias        = bias;
-    _border_size = BorderSize(_conv_pad_y, _conv_pad_x);
-
-    std::set<std::string> options;
-
-    options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0]));
-    options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1]));
-    options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2]));
-    options.emplace("#define STRIDE_X " + support::cpp11::to_string(_conv_stride_x));
-    options.emplace("#define STRIDE_Y " + support::cpp11::to_string(_conv_stride_y));
-
-    std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    options.emplace(("#define " + dt_name));
-
-    // Activation information in case of a fused activation
-    if(act_info.enabled())
-    {
-        options.emplace("#define FUSED_ACTIVATION");
-        options.emplace(("#define " + string_from_activation_func(act_info.activation())));
-        options.emplace(("#define ACT_OP  " + lower_string(string_from_activation_func(act_info.activation())) + "_op"));
-        options.emplace(("#define A_VAL " + float_to_string_with_full_precision(act_info.a())));
-        options.emplace(("#define B_VAL " + float_to_string_with_full_precision(act_info.b())));
-    }
-
-    unsigned int num_elems_read_per_iteration_x    = kernel_size * _conv_stride_x;
-    unsigned int num_elems_read_per_iteration_y    = 1;
-    unsigned int num_elems_written_per_iteration_x = 1;
-    unsigned int num_elems_written_per_iteration_y = 1;
-    unsigned int num_elems_written_per_iteration_z = 1;
-
-    if(kernel_size == 3)
-    {
-        if((_conv_stride_x == 1) && (_conv_stride_y == 1))
-        {
-            switch(input->info()->data_type())
-            {
-                case DataType::F16:
-                    // TODO(APPBROWSER-299): Choose the most optimal path and remove others.
-#define PROCESS_4X_3Y_1Z
-
-#if defined(PROCESS_8X_3Y_1Z)
-                    options.emplace("#define PROCESS_8X_3Y_1Z");
-                    num_elems_read_per_iteration_x    = 16;
-                    num_elems_read_per_iteration_y    = 5;
-                    num_elems_written_per_iteration_x = 8;
-                    num_elems_written_per_iteration_y = 3;
-#elif defined(PROCESS_4X_3Y_1Z)
-                    options.emplace("#define PROCESS_4X_3Y_1Z");
-                    num_elems_read_per_iteration_x    = 8;
-                    num_elems_read_per_iteration_y    = 5;
-                    num_elems_written_per_iteration_x = 4;
-                    num_elems_written_per_iteration_y = 3;
-#elif defined(PROCESS_4X_4Y_1Z)
-                    options.emplace("#define PROCESS_4X_4Y_1Z");
-                    num_elems_read_per_iteration_x    = 8;
-                    num_elems_read_per_iteration_y    = 6;
-                    num_elems_written_per_iteration_x = 4;
-                    num_elems_written_per_iteration_y = 4;
-#elif defined(PROCESS_4X_3Y_2Z)
-                    options.emplace("#define PROCESS_4X_3Y_2Z");
-                    num_elems_read_per_iteration_x    = 8;
-                    num_elems_read_per_iteration_y    = 5;
-                    num_elems_written_per_iteration_x = 4;
-                    num_elems_written_per_iteration_y = 3;
-                    num_elems_written_per_iteration_z = 2;
-#endif /* PROCESS_nX_nY_nZ */
-#undef PROCESS_8X_3Y_1Z
-#undef PROCESS_4X_3Y_1Z
-#undef PROCESS_4X_4Y_1Z
-#undef PROCESS_4X_3Y_2Z
-                    break;
-
-                case DataType::F32:
-                    options.emplace("#define PROCESS_4X_3Y_1Z");
-                    num_elems_read_per_iteration_x    = 8;
-                    num_elems_read_per_iteration_y    = 5;
-                    num_elems_written_per_iteration_x = 4;
-                    num_elems_written_per_iteration_y = 3;
-                    break;
-
-                default:
-                    ARM_COMPUTE_ERROR("Current data type is not supported");
-                    break;
-            }
-        }
-        // FIXME: Just keep one in release
-        else
-        {
-            switch(input->info()->data_type())
-            {
-                case DataType::F16:
-                    options.emplace("#define PROCESS_4X_1Y_1Z");
-                    num_elems_read_per_iteration_x    = 8;
-                    num_elems_written_per_iteration_x = 4;
-                    break;
-
-                case DataType::F32:
-                    // TODO(APPBROWSER-299): Choose the most optimal path and remove others.
-#define PROCESS_4X_1Y_1Z
-
-#if defined(PROCESS_1X_1Y_1Z)
-                    options.emplace("#define PROCESS_1X_1Y_1Z");
-                    num_elems_read_per_iteration_x    = 3;
-                    num_elems_written_per_iteration_x = 1;
-#elif defined(PROCESS_4X_1Y_1Z)
-                    options.emplace("#define PROCESS_4X_1Y_1Z");
-                    num_elems_read_per_iteration_x    = 8;
-                    num_elems_written_per_iteration_x = 4;
-#elif defined(PROCESS_8X_1Y_1Z)
-                    options.emplace("#define PROCESS_8X_1Y_1Z");
-                    num_elems_read_per_iteration_x    = 12;
-                    num_elems_written_per_iteration_x = 8;
-#else /* PROCESS_nX_nY_nZ */
-#error Have to declare how many elements to process in one thread.
-#endif /* PROCESS_nX_nY_nZ */
-#undef PROCESS_1X_1Y_1Z
-#undef PROCESS_4X_1Y_1Z
-#undef PROCESS_8X_1Y_1Z
-                    break;
-
-                default:
-                    ARM_COMPUTE_ERROR("Current data type is not supported");
-                    break;
-            }
-        }
-    }
-    else if(kernel_size == 1)
-    {
-        if(weights->info()->dimension(2) % 2 == 0)
-        {
-            options.emplace("#define WEIGHTS_OPTIMIZATION");
-        }
-        switch(input->info()->data_type())
-        {
-            case DataType::F16:
-#define PROCESS_8X_2Y_1Z
-
-#if defined(PROCESS_4X_1Y_1Z)
-                options.emplace("#define PROCESS_4X_1Y_1Z");
-                num_elems_read_per_iteration_x    = 4;
-                num_elems_written_per_iteration_x = 4;
-#elif defined(PROCESS_4X_2Y_1Z)
-                options.emplace("#define PROCESS_4X_2Y_1Z");
-                num_elems_read_per_iteration_x = 4;
-                num_elems_read_per_iteration_y = 2;
-                num_elems_written_per_iteration_x = 4;
-                num_elems_written_per_iteration_y = 2;
-#elif defined(PROCESS_4X_3Y_1Z)
-                options.emplace("#define PROCESS_4X_3Y_1Z");
-                num_elems_read_per_iteration_x = 4;
-                num_elems_read_per_iteration_y = 3;
-                num_elems_written_per_iteration_x = 4;
-                num_elems_written_per_iteration_y = 3;
-#elif defined(PROCESS_4X_4Y_1Z)
-                options.emplace("#define PROCESS_4X_4Y_1Z");
-                num_elems_read_per_iteration_x = 4;
-                num_elems_read_per_iteration_y = 4;
-                num_elems_written_per_iteration_x = 4;
-                num_elems_written_per_iteration_y = 4;
-#elif defined(PROCESS_4X_2Y_2Z)
-                ARM_COMPUTE_ERROR_ON_MSG((weights->info()->dimension(4) % 2) == 1, "Current 'weights->info()->dimension(4) % 2) == 1' is not supported");
-                options.emplace("#define PROCESS_4X_2Y_2Z");
-                num_elems_read_per_iteration_x    = 4;
-                num_elems_read_per_iteration_y    = 2;
-                num_elems_written_per_iteration_x = 4;
-                num_elems_written_per_iteration_y = 2;
-                num_elems_written_per_iteration_z = 2;
-#elif defined(PROCESS_8X_1Y_1Z)
-                options.emplace("#define PROCESS_8X_1Y_1Z");
-                num_elems_read_per_iteration_x    = 8;
-                num_elems_written_per_iteration_x = 8;
-#elif defined(PROCESS_8X_2Y_1Z)
-                options.emplace("#define PROCESS_8X_2Y_1Z");
-                num_elems_read_per_iteration_x    = 8;
-                num_elems_read_per_iteration_y    = 2;
-                num_elems_written_per_iteration_x = 8;
-                num_elems_written_per_iteration_y = 2;
-#else /* PROCESS_4X_1Y_1Z */
-#error Have to declare how many elements to process in one thread.
-#endif /* PROCESS_4X_1Y_1Z */
-#undef PROCESS_4X_1Y_1Z
-#undef PROCESS_4X_2Y_1Z
-#undef PROCESS_4X_3Y_1Z
-#undef PROCESS_4X_4Y_1Z
-#undef PROCESS_4X_2Y_2Z
-#undef PROCESS_8X_1Y_1Z
-#undef PROCESS_8X_2Y_1Z
-                break;
-
-            case DataType::F32:
-                num_elems_read_per_iteration_x    = 1;
-                num_elems_written_per_iteration_x = 1;
-                break;
-
-            default:
-                break;
-        }
-    }
-    else if(kernel_size == 5)
-    {
-        switch(input->info()->data_type())
-        {
-            case DataType::F16:
-                options.emplace("#define PROCESS_4X_1Y_1Z");
-                num_elems_read_per_iteration_x    = 8;
-                num_elems_written_per_iteration_x = 4;
-
-            default:
-                break;
-        }
-    }
-    else
-    {
-    }
-
-    if(_bias != nullptr)
-    {
-        options.emplace("#define BIAS");
-    }
-
-    std::stringstream kernel_name;
-    kernel_name << "direct_convolution" << kernel_size << "x" << kernel_size;
-
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name.str(), options));
-
-    unsigned int idx = (_bias == nullptr) ? 3 * num_arguments_per_3D_tensor() : (num_arguments_per_1D_tensor() + 3 * num_arguments_per_3D_tensor());
-
-    // Calculate output right and bottom border
-    const int output_width          = output->info()->dimension(0);
-    const int output_height         = output->info()->dimension(1);
-    const int output_padding_right  = ceil_to_multiple(output_width, num_elems_written_per_iteration_x * _lws[0]) - output_width;
-    const int output_padding_bottom = ceil_to_multiple(output_height, num_elems_written_per_iteration_y * _lws[1]) - output_height;
-
-    // Calculate input right and bottom border
-    const int input_width        = input->info()->dimension(0);
-    const int input_height       = input->info()->dimension(1);
-    const int input_total_width  = std::max(int(input->info()->padding().left), int(_conv_pad_x)) + input_width + std::max(int(input->info()->padding().right), int(_conv_pad_x));
-    const int input_total_height = std::max(int(input->info()->padding().top), int(_conv_pad_y)) + input_height + std::max(int(input->info()->padding().bottom), int(_conv_pad_y));
-    const int padding_right1     = ceil_to_multiple(input_total_width, num_elems_read_per_iteration_x * _lws[0]) - input_width - _conv_pad_x;
-    const int padding_bottom1    = ceil_to_multiple(input_total_height, num_elems_read_per_iteration_y * _lws[1]) - input_height - _conv_pad_y;
-
-    const int upper_bound_w   = ceil_to_multiple(((output_width + output_padding_right) * _conv_stride_x + (kernel_size - 1)), num_elems_read_per_iteration_x * _lws[0]) - _conv_pad_x - input_width;
-    const int upper_bound_h   = ceil_to_multiple(((output_height + output_padding_bottom) * _conv_stride_y + (kernel_size - 1)), num_elems_read_per_iteration_y * _lws[1]) - _conv_pad_y - input_height;
-    const int padding_right2  = std::max(upper_bound_w, _conv_pad_x);
-    const int padding_bottom2 = std::max(upper_bound_h, _conv_pad_y);
-
-    const int padding_right  = std::max(padding_right1, padding_right2);
-    const int padding_bottom = std::max(padding_bottom1, padding_bottom2);
-
-    BorderSize border = BorderSize(0, output_padding_right, output_padding_bottom, 0);
-
-    Window win = calculate_max_enlarged_window(*output->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y, num_elems_written_per_iteration_z), border);
-
-    AccessWindowStatic input_access(input->info(), -_conv_pad_x, -_conv_pad_y, input_width + padding_right, input_height + padding_bottom);
-    AccessWindowStatic weights_access = AccessWindowStatic(nullptr, 0, 0, 0, 0);
-    AccessWindowStatic bias_access    = AccessWindowStatic(nullptr, 0, 0, 0, 1);
-
-    switch(weights->info()->data_type())
-    {
-        case DataType::F16:
-            if((weights->info()->dimension(2) % 2 != 0) || (kernel_size != 1))
-            {
-                weights_access = AccessWindowStatic(weights->info(), 0, 0, kernel_size + 1, kernel_size);
-            }
-            if(_bias != nullptr)
-            {
-                bias_access = AccessWindowStatic(_bias->info(), 0, 0, _bias->info()->dimension(0) + 1, 1);
-            }
-            break;
-
-        case DataType::F32:
-            weights_access = AccessWindowStatic(weights->info(), 0, 0, kernel_size, kernel_size);
-            if(_bias != nullptr)
-            {
-                bias_access = AccessWindowStatic(_bias->info(), 0, 0, _bias->info()->dimension(0), 1);
-            }
-            break;
-
-        default:
-            ARM_COMPUTE_ERROR("Current data type is not supported");
-            break;
-    }
-
-    AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height + output_padding_bottom);
-
-    if(_bias != nullptr)
-    {
-        update_window_and_padding(win, input_access, weights_access, bias_access, output_access);
-    }
-    else
-    {
-        update_window_and_padding(win, input_access, weights_access, output_access);
-    }
-
-    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
-    _kernel.set_argument(idx++, _weights->info()->strides_in_bytes()[3]); // weights_stride_w
-    _kernel.set_argument(idx++, _weights->info()->dimension(2));          // weights_depth
-
-    IGCKernel::configure(win);
-}
-
-template <unsigned int kernel_size>
-void GCDirectConvolutionLayerKernel<kernel_size>::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
-    _kernel.use();
-
-    _output->set_needs_shifting(true);
-
-    // Get initial windows
-    Window slice  = window.first_slice_window_3D();
-    Window win_in = window;
-
-    win_in.adjust(Window::DimX, -_conv_pad_x, true);
-    win_in.adjust(Window::DimY, -_conv_pad_y, true);
-    win_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x);
-    win_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y);
-
-    Window slice_in = win_in.first_slice_window_3D();
-
-    unsigned int idx1 = 2 * num_arguments_per_3D_tensor();
-    add_3D_tensor_argument(idx1, _weights, 3, slice);
-
-    if(_bias != nullptr)
-    {
-        Window slice_bias;
-        slice_bias.use_tensor_dimensions(_bias->info()->tensor_shape());
-        add_1D_tensor_argument(idx1, _bias, 4, slice_bias);
-    }
-
-    slice.shift(Window::DimX, -(_output->info()->padding()).left);
-
-    do
-    {
-        unsigned int idx = 0;
-
-        add_3D_tensor_argument(idx, _input, 1, slice_in);
-        add_3D_tensor_argument(idx, _output, 2, slice);
-
-        _kernel.update_shader_params();
-        enqueue(*this, slice, _lws);
-    }
-    while(window.slide_window_slice_3D(slice) && win_in.slide_window_slice_3D(slice_in));
-}
-
-template class arm_compute::GCDirectConvolutionLayerKernel<1>;
-template class arm_compute::GCDirectConvolutionLayerKernel<3>;
-template class arm_compute::GCDirectConvolutionLayerKernel<5>;
diff --git a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp
deleted file mode 100644
index 9368770e22..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "support/StringSupport.h"
-
-#include <cmath>
-#include <random>
-#include <tuple>
-
-using namespace arm_compute;
-
-GCDropoutLayerKernel::GCDropoutLayerKernel()
-    : _input(nullptr), _mask(nullptr), _output(nullptr), _num_elems_processed_per_iteration(0)
-{
-}
-
-void GCDropoutLayerKernel::configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, mask, output);
-
-    _input  = input;
-    _mask   = mask;
-    _output = output;
-
-    std::set<std::string>                 build_opts;
-    std::string                           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    std::string                           fporbp  = forward ? "FORWARD" : "BACKWARD";
-    std::random_device                    rd;
-    std::mt19937                          mt(rd());
-    std::uniform_real_distribution<float> dist(0.f, 1.f);
-
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-    build_opts.emplace("#define RATIO " + support::cpp11::to_string(ratio));
-    build_opts.emplace("#define SCALE " + support::cpp11::to_string(1. / (1. - ratio)));
-    build_opts.emplace("#define SEED " + support::cpp11::to_string(dist(mt)));
-    build_opts.emplace("#define " + dt_name);
-    build_opts.emplace("#define " + fporbp);
-
-    _num_elems_processed_per_iteration = 4 / input->info()->element_size();
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("dropout", build_opts));
-
-    // Configure kernel window
-    Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration));
-
-    output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
-    IGCKernel::configure(win);
-}
-
-void GCDropoutLayerKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
-
-    _kernel.use();
-
-    Window slice = window.first_slice_window_3D();
-
-    do
-    {
-        unsigned int idx = 0;
-
-        add_3D_tensor_argument(idx, _input, 1, slice);
-        add_3D_tensor_argument(idx, _mask, 2, slice);
-        add_3D_tensor_argument(idx, _output, 3, slice);
-
-        _kernel.update_shader_params();
-        enqueue(*this, slice);
-    }
-    while(window.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp
deleted file mode 100644
index d424f0dc79..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <cstdint>
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-GCFillBorderKernel::GCFillBorderKernel()
-    : IGCKernel(), _tensor(nullptr)
-{
-}
-
-bool GCFillBorderKernel::is_parallelisable() const
-{
-    return false;
-}
-
-template <class T>
-void GCFillBorderKernel::set_constant_border(unsigned int idx, const PixelValue &constant_border_value)
-{
-    T value;
-    constant_border_value.get(value);
-    _kernel.set_argument(idx, static_cast<T>(value));
-}
-
-void GCFillBorderKernel::configure(const IGCTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
-{
-    ARM_COMPUTE_ERROR_ON(tensor == nullptr);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::F32, DataType::F16);
-    ARM_COMPUTE_ERROR_ON(tensor->info()->num_channels() != 1);
-
-    border_size.limit(tensor->info()->padding());
-
-    // If there is no border: early exit
-    if(border_size.empty() || border_mode == BorderMode::UNDEFINED)
-    {
-        return;
-    }
-
-    // Select appropriate kernel
-    std::string kernel_name = "fill_image_borders_" + lower_string(string_from_border_mode(border_mode));
-
-    // Define build options
-    std::set<std::string> build_opts;
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-    build_opts.emplace("#define BORDER_SIZE_TOP " + support::cpp11::to_string(border_size.top));
-    build_opts.emplace("#define BORDER_SIZE_BOTTOM " + support::cpp11::to_string(border_size.bottom));
-    build_opts.emplace("#define BORDER_SIZE_LEFT " + support::cpp11::to_string(border_size.left));
-    build_opts.emplace("#define BORDER_SIZE_RIGHT " + support::cpp11::to_string(border_size.right));
-
-    if(border_mode == BorderMode::REPLICATE)
-    {
-        build_opts.emplace("#define FILL_IMAGE_BORDERS_REPLICATE\n");
-    }
-    else
-    {
-        build_opts.emplace("#define FILL_IMAGE_BORDERS_CONSTANT\n");
-    }
-
-    switch(tensor->info()->data_type())
-    {
-        case DataType::F16:
-            build_opts.emplace("#define DATA_TYPE_FP16");
-            break;
-
-        case DataType::F32:
-            build_opts.emplace("#define DATA_TYPE_FP32");
-            break;
-
-        default:
-            ARM_COMPUTE_ERROR("Current data type is not supported");
-            break;
-    }
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, build_opts));
-    _tensor = tensor;
-
-    // Create static kernel arguments
-    const unsigned int valid_width       = tensor->info()->valid_region().shape[0];
-    const unsigned int valid_height      = tensor->info()->valid_region().shape[1];
-    const unsigned int total_valid_width = border_size.left + valid_width + border_size.right;
-
-    // Set static kernel arguments
-    unsigned int idx = num_arguments_per_3D_tensor(); //Skip the tensor parameters
-    _kernel.set_argument(idx++, valid_width);
-    _kernel.set_argument(idx++, valid_height);
-    _kernel.set_argument(idx++, tensor->info()->valid_region().anchor[0]);
-    _kernel.set_argument(idx++, tensor->info()->valid_region().anchor[1]);
-
-    if(BorderMode::CONSTANT == border_mode)
-    {
-        set_constant_border<float>(idx++, constant_border_value);
-    }
-
-    // Configure kernel window
-    Window win;
-    win.set(Window::DimX, Window::Dimension(0, total_valid_width + valid_height));
-    win.set(Window::DimY, Window::Dimension(0, 1, 1));
-    win.use_tensor_dimensions(tensor->info()->tensor_shape(), Window::DimZ);
-
-    IGCKernel::configure(win);
-}
-
-void GCFillBorderKernel::run(const Window &window)
-{
-    // Border mode undefined or border width == 0
-    if(_kernel.get_program() == 0)
-    {
-        return;
-    }
-
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
-
-    _kernel.use();
-    Window slice = window.first_slice_window_3D();
-
-    do
-    {
-        unsigned int idx = 0;
-        add_3D_tensor_argument(idx, _tensor, 1, slice);
-
-        _kernel.update_shader_params();
-
-        enqueue(*this, slice);
-    }
-    while(window.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp
deleted file mode 100644
index 28be710384..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-GCGEMMInterleave4x4Kernel::GCGEMMInterleave4x4Kernel()
-    : _input(nullptr), _output(nullptr)
-{
-}
-
-void GCGEMMInterleave4x4Kernel::configure(const IGCTensor *input, IGCTensor *output)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-
-    TensorShape output_shape = input->info()->tensor_shape();
-    output_shape.set(0, input->info()->dimension(0) * 4);
-    output_shape.set(1, std::ceil(input->info()->dimension(1) / 4.0f));
-
-    // Output auto inizialitation if not yet initialized
-    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
-    _input  = input;
-    _output = output;
-
-    std::set<std::string> build_opts;
-    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.emplace(("#define " + dt_name));
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-
-    // Create kernel
-    build_opts.emplace("#define GEMM_INTERLEAVE4x4");
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("gemm_interleave4x4", build_opts));
-
-    // Configure kernel window
-    const unsigned int     num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(input->info()->data_type());
-    constexpr unsigned int num_elems_processed_per_iteration_y = 4;
-    const unsigned int     num_elems_written_per_iteration     = num_elems_processed_per_iteration_x * num_elems_processed_per_iteration_y;
-
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
-
-    AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
-    AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration, 1, 4.f, 0.25f);
-
-    update_window_and_padding(win, input_access, output_access);
-
-    output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
-    IGCKernel::configure(win);
-}
-
-void GCGEMMInterleave4x4Kernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
-    _kernel.use();
-
-    /*
-     * This kernel puts the values in a 4x4 block of Matrix A on the same row (Interleaved values)
-     *         |a00 a01 a02 a03|
-     *         |a10 a11 a12 a13|
-     *         |a20 a21 a22 a23| = | a00 a10 a20 a30 || a01 a11 a21 a31 || a02 a12 a22 a32 || a03 a13 a23 a33 |
-     *         |a30 a31 a32 a33|
-     *
-     * After this operation, the output matrix will have the following shape: [ height * 4, width / 4 ]
-     */
-    Window in_slice  = window.first_slice_window_2D();
-    Window out_slice = window.first_slice_window_2D();
-
-    // Change x and y steps for the slide of output tensor
-    out_slice.scale(Window::DimX, 4.f);
-    out_slice.scale(Window::DimY, 0.25f);
-
-    do
-    {
-        unsigned int idx = 0;
-        add_2D_tensor_argument(idx, _input, 1, in_slice);
-        add_2D_tensor_argument(idx, _output, 2, out_slice);
-
-        _kernel.update_shader_params();
-
-        enqueue(*this, in_slice);
-    }
-    while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
deleted file mode 100644
index f4c84f3d66..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-GCGEMMMatrixAccumulateBiasesKernel::GCGEMMMatrixAccumulateBiasesKernel()
-    : _accum(nullptr), _biases(nullptr), _lws(gles::NDRange(1U, 1U, 1U))
-{
-}
-
-void GCGEMMMatrixAccumulateBiasesKernel::configure(IGCTensor *accum, const IGCTensor *biases)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
-    ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() != 1);
-
-    _biases = biases;
-    _accum  = accum;
-
-    std::set<std::string> build_opts;
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0]));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1]));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2]));
-
-    // Create kernel
-    build_opts.emplace("#define GEMM_ACCUMULATE_BIASES");
-
-#define ACCUM_PROCESS_4X
-
-#if defined(ACCUM_PROCESS_4X)
-    build_opts.emplace("#define ACCUM_PROCESS_4X");
-#elif defined(ACCUM_PROCESS_8X) /* ACCUM_PROCESS_4X */
-    build_opts.emplace("#define ACCUM_PROCESS_8X");
-#endif                          /* ACCUM_PROCESS_4X */
-    std::string dt_name = (accum->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.emplace(("#define " + dt_name));
-
-    _kernel = GCKernelLibrary::get().create_kernel("gemm_accumulate_biases", build_opts);
-
-    // Configure kernel window
-    unsigned int num_elems_processed_per_iteration = 1;
-
-    if(_accum->info()->data_type() == DataType::F32)
-    {
-        num_elems_processed_per_iteration = 16;
-    }
-    else if(_accum->info()->data_type() == DataType::F16)
-    {
-#if defined(ACCUM_PROCESS_4X)
-        num_elems_processed_per_iteration = 4;
-#elif defined(ACCUM_PROCESS_8X) /* ACCUM_PROCESS_4X */
-        num_elems_processed_per_iteration = 8;
-#endif                          /* ACCUM_PROCESS_4X */
-    }
-
-    const int  accum_width         = accum->info()->dimension(0);
-    const int  accum_padding_right = ceil_to_multiple(accum_width, num_elems_processed_per_iteration * _lws[0]) - accum_width;
-    BorderSize border              = BorderSize(0, accum_padding_right, 0, 0);
-
-    Window win = calculate_max_enlarged_window(*_accum->info(), Steps(num_elems_processed_per_iteration), border);
-
-    AccessWindowStatic biases_access(biases->info(), 0, 0, ceil_to_multiple(biases->info()->dimension(0), num_elems_processed_per_iteration * _lws[0]), biases->info()->dimension(1));
-    AccessWindowStatic accum_access(_accum->info(), 0, 0, accum_width + accum_padding_right, _accum->info()->dimension(1));
-
-    update_window_and_padding(win, biases_access, accum_access);
-
-    IGCKernel::configure(win);
-}
-
-void GCGEMMMatrixAccumulateBiasesKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
-
-    _kernel.use();
-
-    Window accum_slice = window.first_slice_window_2D();
-
-    Window biases_slice(accum_slice);
-    biases_slice.set(Window::DimY, Window::Dimension(0, 1, 1));
-
-    // Run kernel
-    do
-    {
-        // Set arguments
-        unsigned int idx = 0;
-
-        add_2D_tensor_argument(idx, _accum, 1, accum_slice);
-        add_1D_tensor_argument(idx, _biases, 2, biases_slice);
-        _kernel.update_shader_params();
-
-        enqueue(*this, accum_slice, _lws);
-    }
-    while(window.slide_window_slice_2D(accum_slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp
deleted file mode 100644
index 0429824b04..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-GCGEMMMatrixAdditionKernel::GCGEMMMatrixAdditionKernel()
-    : _input(nullptr), _output(nullptr)
-{
-}
-
-void GCGEMMMatrixAdditionKernel::configure(const IGCTensor *input, IGCTensor *output, float beta)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0));
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1));
-
-    _input                                               = input;
-    _output                                              = output;
-    const unsigned int num_elems_processed_per_iteration = max_gc_vector_width / data_size_from_type(input->info()->data_type());
-
-    std::set<std::string> build_opts;
-    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.emplace(("#define " + dt_name));
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-    build_opts.emplace("#define BETA " + float_to_string_with_full_precision(beta));
-
-    // Create kernel
-    build_opts.emplace("#define GEMM_MATRIXADDITION");
-    std::string data_type_name = lower_string(string_from_data_type(input->info()->data_type()));
-    _kernel                    = GCKernelLibrary::get().create_kernel(("gemm_ma"), build_opts);
-
-    // Configure kernel window
-    Window win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration));
-
-    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
-    update_window_and_padding(win, input_access, output_access);
-
-    output_access.set_valid_region(win, input->info()->valid_region());
-
-    IGCKernel::configure(win);
-}
-
-void GCGEMMMatrixAdditionKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
-    _kernel.use();
-
-    Window slice = window.first_slice_window_2D();
-
-    do
-    {
-        unsigned int idx = 0;
-        add_2D_tensor_argument(idx, _input, 1, slice);
-        add_2D_tensor_argument(idx, _output, 2, slice);
-
-        _kernel.update_shader_params();
-
-        enqueue(*this, slice);
-    }
-    while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
deleted file mode 100644
index 2a85e0d77d..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/AccessWindowTranspose.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-namespace
-{
-using ElementsProcessed = Steps;
-
-inline Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
-{
-    ARM_COMPUTE_UNUSED(reshape_info);
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 3, "The number of dimensions for the matrix B must be <= 3");
-
-    if(!is_interleaved_transposed)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != input1->dimension(1));
-
-        if(output->total_size() != 0)
-        {
-            ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != output->dimension(0));
-            ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != output->dimension(1));
-            ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output);
-        }
-    }
-    else
-    {
-        const int m                         = reshape_info.m();
-        const int n                         = reshape_info.n();
-        const int k                         = reshape_info.k();
-        const int mult_transpose1xW_width   = reshape_info.mult_transpose1xW_width();
-        const int mult_interleave4x4_height = reshape_info.mult_interleave4x4_height();
-
-        TensorShape tensor_shape0{ input0->tensor_shape() };
-        tensor_shape0.set(0, k);
-        tensor_shape0.set(1, m);
-
-        TensorShape tensor_shape1{ input1->tensor_shape() };
-        tensor_shape1.set(0, n);
-        tensor_shape1.set(1, k);
-
-        const TensorInfo tensor_info0 = input0->clone()->set_tensor_shape(tensor_shape0);
-        const TensorInfo tensor_info1 = input1->clone()->set_tensor_shape(tensor_shape1);
-
-        const TensorInfo tensor_info_reshaped0 = input0->clone()->set_tensor_shape(compute_interleaved_shape(tensor_info0, mult_interleave4x4_height));
-        const TensorInfo tensor_info_reshaped1 = input1->clone()->set_tensor_shape(compute_transpose1xW_with_element_size_shape(tensor_info1, mult_transpose1xW_width));
-
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input0, &tensor_info_reshaped0);
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, &tensor_info_reshaped1);
-
-        if(output->total_size() != 0)
-        {
-            ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != static_cast<size_t>(n));
-            ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != static_cast<size_t>(m));
-            ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output);
-        }
-    }
-
-    return Status{};
-}
-
-inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output,
-                                                               bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info,
-                                                               GPUTarget gpu_target, ElementsProcessed &num_elements_processed)
-{
-    ARM_COMPUTE_UNUSED(gpu_target);
-
-    // Output tensor auto inizialitation if not yet initialized
-    TensorShape tensor_shape{ input0->tensor_shape() };
-    tensor_shape.set(0, is_interleaved_transposed ? reshape_info.n() : input1->dimension(0));
-    tensor_shape.set(1, is_interleaved_transposed ? reshape_info.m() : input0->dimension(1));
-
-    auto_init_if_empty(*output, input0->clone()->set_tensor_shape(tensor_shape));
-
-    bool   window_changed = false;
-    Window win{};
-
-    const DataType data_type                           = input0->data_type();
-    unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
-    unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
-
-    if(is_interleaved_transposed)
-    {
-        // Configure window kernel
-        num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(data_type);
-        num_elems_processed_per_iteration_y = 4;
-
-        win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
-
-        AccessWindowRectangle input0_access(input0, 0, 0, num_elems_processed_per_iteration_y, 1, 1.f, 0.25f);
-        AccessWindowTranspose input1_access(input1, 0, 0, num_elems_processed_per_iteration_x, 1, 0.f, 0.25f);
-        AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
-
-        update_window_and_padding(win, input0_access, input1_access, output_access);
-
-        output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
-    }
-    else // The input tensors have not been reshaped
-    {
-        // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor.
-        num_elems_processed_per_iteration_y = std::min(static_cast<int>(output->dimension(1)), 4);
-
-        switch(data_type)
-        {
-            case DataType::F16:
-                num_elems_processed_per_iteration_x = 4;
-                break;
-
-            case DataType::F32:
-                num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(data_type);
-                break;
-
-            default:
-                ARM_COMPUTE_ERROR("Current data type is not supported");
-                break;
-        }
-
-        win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
-
-        AccessWindowStatic    input0_access(input0, 0, 0, ceil_to_multiple(input0->dimension(0), 8), ceil_to_multiple(input0->dimension(1), num_elems_processed_per_iteration_y));
-        AccessWindowStatic    input1_access(input1, 0, 0, ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration_x), input1->dimension(1));
-        AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
-
-        update_window_and_padding(win, input0_access, input1_access, output_access);
-
-        Coordinates coord;
-        coord.set_num_dimensions(output->num_dimensions());
-        output_access.set_valid_region(win, ValidRegion(coord, output->tensor_shape()));
-    }
-
-    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
-    return std::make_pair(err, win);
-}
-} // namespace
-
-GCGEMMMatrixMultiplyKernel::GCGEMMMatrixMultiplyKernel()
-    : _input0(nullptr), _input1(nullptr), _output(nullptr)
-{
-}
-
-void GCGEMMMatrixMultiplyKernel::configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
-
-    // Perform validate step
-    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info));
-
-    _input0 = input0;
-    _input1 = input1;
-    _output = output;
-
-    // Get target architecture
-    GPUTarget gpu_target = get_target();
-
-    ElementsProcessed num_elements_processed{};
-
-    // Configure kernel window
-    auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info, gpu_target, num_elements_processed);
-    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    IGCKernel::configure(win_config.second);
-
-    // Create build options
-    std::set<std::string> build_opts;
-    std::string           kernel_name;
-
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-    build_opts.emplace("#define COLS_A " + support::cpp11::to_string(input0->info()->dimension(0)));
-    build_opts.emplace("#define COLS_B " + support::cpp11::to_string(input1->info()->dimension(0)));
-    build_opts.emplace("#define ALPHA " + float_to_string_with_full_precision(alpha));
-
-    // Check if the output tensor is a vector. If so,the kernel runs the vector-matrix multiplication
-    if(is_interleaved_transposed)
-    {
-        const int mult_transpose1xW_width   = reshape_info.mult_transpose1xW_width();
-        const int mult_interleave4x4_height = reshape_info.mult_interleave4x4_height();
-
-        build_opts.emplace("#define MULT_TRANSPOSE1XW_WIDTH " + support::cpp11::to_string(mult_transpose1xW_width));
-        build_opts.emplace("#define MULT_INTERLEAVE4X4_HEIGHT " + support::cpp11::to_string(mult_interleave4x4_height));
-
-        switch(input0->info()->data_type())
-        {
-            case DataType::F16:
-                build_opts.emplace("#define DATA_TYPE_FP16");
-                break;
-
-            case DataType::F32:
-                build_opts.emplace("#define DATA_TYPE_FP32");
-                break;
-
-            default:
-                ARM_COMPUTE_ERROR("Current data type is not supported");
-                break;
-        }
-
-        build_opts.emplace("#define GEMM_MM_INTERLEAVED_TRANSPOSED");
-
-        kernel_name = "gemm_mm_interleaved_transposed";
-    }
-    else
-    {
-        // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor
-
-        GPUTarget arch_target = get_arch_from_target(gpu_target);
-        switch(input0->info()->data_type())
-        {
-            case DataType::F16:
-                build_opts.emplace("#define DATA_TYPE_FP16");
-                build_opts.emplace("#define MM_PROCESS_4X_OPTIMIZED");
-                build_opts.emplace("#define GEMM_MM_FLOATING_POINT");
-                break;
-
-            case DataType::F32:
-                build_opts.emplace("#define DATA_TYPE_FP32");
-
-                if(arch_target == GPUTarget::BIFROST && input0->info()->num_dimensions() != 1)
-                {
-                    build_opts.emplace("#define GEMM_MM_FLOATING_POINT_BIFROST");
-                }
-                else
-                {
-                    build_opts.emplace("#define GEMM_MM_FLOATING_POINT");
-                }
-                break;
-
-            default:
-                ARM_COMPUTE_ERROR("Current data type is not supported");
-                break;
-        }
-
-        build_opts.emplace("#define NUM_ELEMS_PROCESSED_PER_THREAD_X " + support::cpp11::to_string(num_elements_processed.x()));
-        build_opts.emplace("#define NUM_ELEMS_PROCESSED_PER_THREAD_Y " + support::cpp11::to_string(num_elements_processed.y()));
-
-        kernel_name = "gemm_mm_floating_point";
-    }
-
-    // Create kernel
-    _kernel = GCKernelLibrary::get().create_kernel(kernel_name, build_opts);
-}
-
-Status GCGEMMMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved_transposed,
-                                            const GEMMReshapeInfo &reshape_info, GPUTarget gpu_target)
-{
-    ARM_COMPUTE_UNUSED(alpha);
-    ElementsProcessed num_elements_processed{};
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output, is_interleaved_transposed, reshape_info));
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input0->clone().get(),
-                                                              input1->clone().get(),
-                                                              output->clone().get(),
-                                                              is_interleaved_transposed,
-                                                              reshape_info,
-                                                              gpu_target,
-                                                              num_elements_processed)
-                                .first);
-    return Status{};
-}
-
-void GCGEMMMatrixMultiplyKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
-    _kernel.use();
-
-    Window slice          = window.first_slice_window_2D();
-    Window slice_matrix_b = slice;
-
-    slice_matrix_b.set(Window::DimX, Window::Dimension(0, 1, 1));
-    slice_matrix_b.set(Window::DimY, Window::Dimension(0, 1, 1));
-
-    do
-    {
-        Window slice_b = slice;
-        // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A more than 2
-        // This scenario can happen when the the matrix multiplication is used to perform a convolution operation
-        if(_input1->info()->num_dimensions() < 3)
-        {
-            slice_b = slice_matrix_b;
-        }
-
-        unsigned int idx = 0;
-
-        add_2D_tensor_argument(idx, _input0, 1, slice);
-        add_2D_tensor_argument(idx, _input1, 2, slice_b);
-        add_2D_tensor_argument(idx, _output, 3, slice);
-        _kernel.update_shader_params();
-        enqueue(*this, slice);
-    }
-    while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp
deleted file mode 100644
index 6ebd8dd6e4..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h"
-
-#include "arm_compute/core/AccessWindowTranspose.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <cmath>
-
-using namespace arm_compute;
-
-void GCGEMMTranspose1xWKernel::configure(const IGCTensor *input, IGCTensor *output)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-
-    TensorShape  output_shape{ input->info()->tensor_shape() };
-    const size_t transpose_w = 16 / input->info()->element_size();
-    output_shape.set(0, input->info()->dimension(1) * transpose_w);
-    output_shape.set(1, static_cast<size_t>(std::ceil((input->info()->dimension(0) / static_cast<float>(transpose_w)))));
-
-    // Output tensor auto inizialitation if not yet initialized
-    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
-
-    const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
-    const int          scale_x                           = num_elems_processed_per_iteration;
-
-    _input  = input;
-    _output = output;
-
-    std::set<std::string> build_opts;
-    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.emplace(("#define " + dt_name));
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-    /*
-     * Following an example of how the transposition1xW works when the input data type is F32
-     *
-     *         |a00 a01 a02 a03|
-     *         |a10 a11 a12 a13|
-     *         |a20 a21 a22 a23| = | a00 a01 a02 a03 || a10 a11 a12 a13 || a20 a21 a22 a23 || a30 a31 a32 a33 |
-     *         |a30 a31 a32 a33|
-     *
-     * The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
-     */
-    // Create kernel
-    build_opts.emplace("#define GEMM_TRANSPOSE1xW");
-    _kernel = GCKernelLibrary::get().create_kernel("gemm_transpose1x4", build_opts);
-
-    // Configure window
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
-    ARM_COMPUTE_ERROR_ON_MSG((win.x().end() / scale_x) == 0, "Transposed shape would be 0 in the second dimension");
-
-    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowTranspose  output_access(output->info(), 0, 0, num_elems_processed_per_iteration, 1, scale_x, 1.f / scale_x);
-
-    update_window_and_padding(win, input_access, output_access);
-
-    output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
-    IGCKernel::configure(win);
-}
-
-void GCGEMMTranspose1xWKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
-    _kernel.use();
-
-    // Output is transposed
-    Window out_window(window);
-    out_window.set(Window::DimX, window.y());
-    out_window.set(Window::DimY, window.x());
-
-    Window in_slice  = window.first_slice_window_2D();
-    Window out_slice = out_window.first_slice_window_2D();
-
-    do
-    {
-        unsigned int idx = 0;
-        add_2D_tensor_argument(idx, _input, 1, in_slice);
-        add_2D_tensor_argument(idx, _output, 2, out_slice);
-
-        _kernel.update_shader_params();
-
-        enqueue(*this, in_slice);
-    }
-    while(window.slide_window_slice_2D(in_slice) && out_window.slide_window_slice_2D(out_slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
deleted file mode 100644
index 1890cf7e04..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "support/StringSupport.h"
-
-#include <cmath>
-#include <tuple>
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
-{
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
-
-    // Checks performed when output is configured
-    if(output->total_size() != 0)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    }
-
-    return Status{};
-}
-} // namespace
-
-GCIm2ColKernel::GCIm2ColKernel()
-    : _input(nullptr), _output(nullptr), _convolved_dims(), _kernel_dims(), _num_elems_processed_per_iteration(1), _run_func(nullptr)
-{
-}
-
-void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
-    // Perform validation step
-    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
-
-    _input  = input;
-    _output = output;
-
-    // Create kernel
-    std::set<std::string> build_opts;
-    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-    build_opts.insert("#define " + dt_name);
-
-    if(has_bias)
-    {
-        build_opts.emplace("#define HAS_BIAS");
-    }
-
-    int stride_x = 0;
-    int stride_y = 0;
-
-    std::tie(stride_x, stride_y) = conv_info.stride();
-    _kernel_dims                 = std::make_pair(kernel_dims.width, kernel_dims.height);
-
-    const bool run_img2col_reduced = (output->info()->dimension(0) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))) && (TensorShape::num_max_dimensions >= 4)
-                                     && (std::equal(input->info()->tensor_shape().cbegin() + 3,
-                                                    input->info()->tensor_shape().cend(),
-                                                    output->info()->tensor_shape().cbegin() + 1))
-                                     && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding())
-                                     && (dilation == Size2D(1U, 1U));
-
-    std::string kernel_name = "im2col_generic";
-    if(!run_img2col_reduced)
-    {
-        if(input->info()->data_type() == DataType::F16 && _kernel_dims == std::pair<unsigned int, unsigned int>(1, 1))
-        {
-            build_opts.emplace("#define KERNEL_1x1");
-        }
-
-        build_opts.emplace("#define IM2COL_GENERIC");
-        _convolved_dims                    = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
-                                                               kernel_dims.width, kernel_dims.height,
-                                                               conv_info, dilation);
-        _num_elems_processed_per_iteration = (input->info()->data_type() == DataType::F32) ? 1 : 2;
-
-        build_opts.emplace("#define KERNEL_WIDTH " + support::cpp11::to_string(kernel_dims.width));
-        build_opts.emplace("#define KERNEL_HEIGHT " + support::cpp11::to_string(kernel_dims.height));
-        build_opts.emplace("#define KERNEL_DEPTH " + support::cpp11::to_string(input->info()->dimension(2)));
-        build_opts.emplace("#define CONVOLVED_WIDTH " + support::cpp11::to_string(_convolved_dims.first));
-        build_opts.emplace("#define CONVOLVED_HEIGHT " + support::cpp11::to_string(_convolved_dims.second));
-        build_opts.emplace("#define STRIDE_X " + support::cpp11::to_string(conv_info.stride().first));
-        build_opts.emplace("#define STRIDE_Y " + support::cpp11::to_string(conv_info.stride().second));
-        build_opts.emplace("#define PAD_LEFT " + support::cpp11::to_string(conv_info.pad_left()));
-        build_opts.emplace("#define PAD_TOP " + support::cpp11::to_string(conv_info.pad_top()));
-        build_opts.emplace("#define PAD_RIGHT " + support::cpp11::to_string(conv_info.pad_right()));
-        build_opts.emplace("#define PAD_BOTTOM " + support::cpp11::to_string(conv_info.pad_bottom()));
-        build_opts.emplace("#define SRC_WIDTH " + support::cpp11::to_string(input->info()->dimension(0)));
-        build_opts.emplace("#define SRC_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1)));
-        build_opts.emplace("#define DILATION_X " + support::cpp11::to_string(dilation.x()));
-        build_opts.emplace("#define DILATION_Y " + support::cpp11::to_string(dilation.y()));
-
-        _run_func = &GCIm2ColKernel::run_generic;
-    }
-    else
-    {
-        build_opts.emplace("#define IM2COL_REDUCED");
-        kernel_name = "im2col_reduced";
-
-        if(input->info()->data_type() == DataType::F32)
-        {
-            _num_elems_processed_per_iteration = 4 / input->info()->element_size();
-        }
-        else if(input->info()->data_type() == DataType::F16)
-        {
-            int input_width  = input->info()->dimension(0);
-            int input_height = input->info()->dimension(1);
-
-            build_opts.emplace("#define IMAGE_SIZE " + support::cpp11::to_string(input_width * input_height));
-            if(input_width % 8 == 0)
-            {
-                _num_elems_processed_per_iteration = 8;
-                build_opts.emplace("#define IM2COL_REDUCED_8X");
-            }
-            else if(input_width % 4 == 0)
-            {
-                _num_elems_processed_per_iteration = 4;
-                build_opts.emplace("#define IM2COL_REDUCED_4X");
-            }
-            else if(input_width % 2 == 0)
-            {
-                _num_elems_processed_per_iteration = 2;
-                build_opts.emplace("#define IM2COL_REDUCED_2X");
-            }
-            else
-            {
-                _num_elems_processed_per_iteration = 2;
-                build_opts.emplace("#define IM2COL_REDUCED_GENERIC");
-            }
-        }
-
-        _run_func = &GCIm2ColKernel::run_reduced;
-    }
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, build_opts));
-
-    // Configure kernel window
-    Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration));
-
-    if(input->info()->data_type() == DataType::F16)
-    {
-        // Calculate input right and bottom border
-        const int input_width         = input->info()->dimension(0);
-        const int input_height        = input->info()->dimension(1);
-        int       input_total_width   = input->info()->padding().left + input_width + input->info()->padding().right;
-        int       input_padding_right = ceil_to_multiple(input_total_width, _num_elems_processed_per_iteration) - input_total_width;
-        input_total_width             = input_width + input_padding_right + input->info()->padding().right;
-        AccessWindowStatic input_access(input->info(), 0, 0, input_total_width, input_height);
-
-        // Calculate output right and bottom border
-        const int          output_width         = output->info()->dimension(0);
-        const int          output_height        = output->info()->dimension(1);
-        const int          output_padding_right = ceil_to_multiple(output_width, _num_elems_processed_per_iteration) - output_width;
-        AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height);
-
-        update_window_and_padding(win, input_access, output_access);
-    }
-
-    output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
-    if(!run_img2col_reduced)
-    {
-        // set the Z dimension's step same size as the whole dimension so that one can't split across the Z dimension
-        win.set_dimension_step(Window::DimZ, win[Window::DimZ].end() - win[Window::DimZ].start());
-    }
-
-    IGCKernel::configure(win);
-}
-
-Status GCIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
-{
-    ARM_COMPUTE_UNUSED(kernel_dims);
-    ARM_COMPUTE_UNUSED(conv_info);
-    ARM_COMPUTE_UNUSED(has_bias);
-    ARM_COMPUTE_UNUSED(dilation);
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
-    return Status{};
-}
-
-void GCIm2ColKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON(_run_func == nullptr);
-    (this->*_run_func)(window);
-}
-
-void GCIm2ColKernel::run_generic(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
-
-    // Get initial windows
-    Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
-
-    // Change the Z dimension's step back to 1
-    window_collapsed.set_dimension_step(Window::DimZ, 1);
-
-    Window slice     = window_collapsed.first_slice_window_3D();
-    Window slice_in  = window_collapsed.first_slice_window_3D();
-    Window slice_out = window_collapsed.first_slice_window_3D();
-
-    // Setup slice
-    slice.set(Window::DimX, Window::Dimension(0, static_cast<int>(_convolved_dims.first), 1));
-    slice.set(Window::DimY, Window::Dimension(0, static_cast<int>(_convolved_dims.second), 1));
-
-    // Setup output slice
-    slice_out.set(Window::DimX, Window::Dimension(0, _output->info()->dimension(0), _num_elems_processed_per_iteration));
-    slice_out.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 1));
-    slice_out.set(Window::DimZ, Window::Dimension(0, 1, 1));
-
-    // we need top/left pad to be included in valid region
-    if(_input->info()->data_type() == DataType::F16)
-    {
-        (dynamic_cast<TensorInfo *>(_input->info()))->init(_input->info()->tensor_shape(), _input->info()->num_channels(), _input->info()->data_type(), _input->info()->strides_in_bytes(), 0,
-                                                           _input->info()->total_size());
-    }
-
-    _kernel.use();
-
-    do
-    {
-        unsigned int idx = 0;
-        add_3D_tensor_argument(idx, _input, 1, slice_in);
-        add_2D_tensor_argument(idx, _output, 2, slice_out);
-        _kernel.set_argument(idx++, static_cast<unsigned int>(_input->info()->strides_in_bytes()[3]));
-        _kernel.set_argument(idx++, static_cast<unsigned int>(_output->info()->strides_in_bytes()[3]));
-        _kernel.update_shader_params();
-
-        enqueue(*this, slice);
-    }
-    while(window_collapsed.slide_window_slice_3D(slice) && window_collapsed.slide_window_slice_3D(slice_out) && window_collapsed.slide_window_slice_3D(slice_in));
-}
-
-void GCIm2ColKernel::run_reduced(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
-
-    Window out_window;
-    out_window.use_tensor_dimensions(_output->info()->tensor_shape());
-
-    Window out_slice = out_window.first_slice_window_1D();
-    Window in_slice  = window.first_slice_window_3D();
-
-    _kernel.use();
-
-    // Run kernel
-    do
-    {
-        // Set arguments
-        unsigned int idx = 0;
-
-        add_3D_tensor_argument(idx, _input, 1, in_slice);
-        add_1D_tensor_argument(idx, _output, 2, out_slice);
-        _kernel.set_argument(idx++, _input->info()->dimension(0));
-        _kernel.set_argument(idx++, _input->info()->dimension(1));
-        _kernel.update_shader_params();
-
-        enqueue(*this, in_slice);
-    }
-    while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp
deleted file mode 100644
index 094d895442..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h"
-
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <string>
-
-using namespace arm_compute;
-
-GCNormalizationLayerKernel::GCNormalizationLayerKernel()
-    : _input(nullptr), _squared_input(nullptr), _output(nullptr), _border_size(0)
-{
-}
-
-BorderSize GCNormalizationLayerKernel::border_size() const
-{
-    return _border_size;
-}
-
-void GCNormalizationLayerKernel::configure(const IGCTensor *input, const IGCTensor *squared_input, IGCTensor *output, NormalizationLayerInfo norm_info)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd");
-    ARM_COMPUTE_ERROR_ON_MSG(norm_info.type() == NormType::IN_MAP_2D, "2D In-Map Normalization not implemented");
-
-    // Set build options
-    std::set<std::string> build_opts;
-
-    _input         = input;
-    _squared_input = squared_input;
-    _output        = output;
-
-    const bool         is_in_map    = norm_info.is_in_map();
-    const unsigned int border_width = is_in_map ? std::min(norm_info.norm_size() / 2, 3U) : 0;
-    _border_size                    = BorderSize(0, border_width);
-
-    // Set kernel static arguments
-    std::string func_name = ((norm_info.type() == NormType::IN_MAP_1D) ? "IN_MAP_1D" : "CROSS_MAP");
-    build_opts.emplace(("#define " + func_name));
-    build_opts.emplace(("#define COEFF " + float_to_string_with_full_precision(norm_info.scale_coeff())));
-    build_opts.emplace(("#define BETA " + float_to_string_with_full_precision(norm_info.beta())));
-    build_opts.emplace(("#define KAPPA " + float_to_string_with_full_precision(norm_info.kappa())));
-    build_opts.emplace(("#define RADIUS " + support::cpp11::to_string(norm_info.norm_size() / 2)));
-    build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)));
-    build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)));
-    build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)));
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("normalization_layer", build_opts));
-
-    // Configure kernel window
-    const unsigned int num_elems_processed_per_iteration = 1;
-    const unsigned int num_elems_read_per_iteration      = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2);
-
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
-    AccessWindowHorizontal input_access(input->info(), -_border_size.left, num_elems_read_per_iteration);
-    AccessWindowHorizontal squared_input_access(squared_input->info(), -_border_size.left, num_elems_read_per_iteration);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
-    update_window_and_padding(win, input_access, squared_input_access, output_access);
-
-    output_access.set_valid_region(win, input->info()->valid_region());
-
-    IGCKernel::configure(win);
-}
-
-void GCNormalizationLayerKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
-    _kernel.use();
-
-    Window slice = window.first_slice_window_3D();
-
-    do
-    {
-        unsigned int idx     = 0;
-        unsigned int binding = 1;
-        add_3D_tensor_argument(idx, _input, binding++, slice);
-        add_3D_tensor_argument(idx, _squared_input, binding++, slice);
-        add_3D_tensor_argument(idx, _output, binding++, slice);
-
-        _kernel.update_shader_params();
-
-        enqueue(*this, slice);
-    }
-    while(window.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp
deleted file mode 100644
index ff885da879..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std)
-{
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16);
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
-    ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW);
-
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, std);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, std);
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(mean->num_dimensions() > 1, "mean and std must be vectors");
-
-    const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
-    ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) != mean->dimension(0));
-
-    // Checks performed when output is configured
-    if(output->total_size() != 0)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
-    }
-
-    return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, ITensorInfo *mean, ITensorInfo *std)
-{
-    // Output tensor auto initialization if not yet initialized
-    auto_init_if_empty(*output, *input->clone());
-
-    const unsigned int num_elems_processed_per_iteration = 4;
-
-    // Configure kernel window
-    Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
-
-    AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-    const int              mean_padding = ceil_to_multiple(mean->dimension(0), num_elems_processed_per_iteration) - mean->dimension(0);
-    const int              std_padding  = ceil_to_multiple(std->dimension(0), num_elems_processed_per_iteration) - std->dimension(0);
-    AccessWindowStatic     mean_access(mean, 0, 0, mean->dimension(0) + mean_padding, mean->dimension(1));
-    AccessWindowStatic     std_access(std, 0, 0, std->dimension(0) + std_padding, std->dimension(1));
-
-    const bool window_changed = update_window_and_padding(win, input_access, output_access, mean_access, std_access);
-    output_access.set_valid_region(win, input->valid_region());
-
-    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
-    return std::make_pair(err, win);
-}
-} // namespace
-
-GCNormalizePlanarYUVLayerKernel::GCNormalizePlanarYUVLayerKernel()
-    : _input(nullptr), _output(nullptr), _mean(nullptr), _std(nullptr)
-{
-}
-
-void GCNormalizePlanarYUVLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *std)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, std);
-
-    // Output tensor auto initialization if not yet initialized
-    auto_init_if_empty(*output->info(), *input->info()->clone());
-
-    // Perform validation step
-    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), mean->info(), std->info()));
-
-    _input  = input;
-    _output = output;
-    _mean   = mean;
-    _std    = std;
-
-    // Set build options
-    std::set<std::string> build_opts;
-    build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)));
-    build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)));
-    build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)));
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("normalize_planar_yuv_layer", build_opts));
-
-    // Configure kernel window
-    auto win_config = validate_and_configure_window(input->info(), output->info(), mean->info(), std->info());
-    ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
-
-    IGCKernel::configure(std::get<1>(win_config));
-}
-
-Status GCNormalizePlanarYUVLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std)
-{
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, mean, std));
-    ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get(), mean->clone().get(), std->clone().get())));
-    return Status{};
-}
-
-void GCNormalizePlanarYUVLayerKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
-    _kernel.use();
-
-    _output->set_needs_shifting(true);
-
-    Window slice = window.first_slice_window_3D();
-
-    Window slice_in;
-    //slice_in.use_tensor_dimensions(_mean->info()->tensor_shape());
-    slice_in = window.first_slice_window_1D();
-    slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
-
-    unsigned int idx = 2 * num_arguments_per_3D_tensor();
-    add_1D_tensor_argument(idx, _mean, 3, slice_in);
-    add_1D_tensor_argument(idx, _std, 4, slice_in);
-
-    slice_in = window.first_slice_window_3D();
-
-    slice.shift(Window::DimX, -(_output->info()->padding()).left);
-
-    do
-    {
-        idx = 0;
-        add_3D_tensor_argument(idx, _input, 1, slice_in);
-        add_3D_tensor_argument(idx, _output, 2, slice);
-
-        _kernel.update_shader_params();
-
-        enqueue(*this, slice);
-    }
-    while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp
deleted file mode 100644
index 69c97a846a..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <cmath>
-#include <cstdlib>
-#include <set>
-#include <string>
-using namespace arm_compute;
-
-GCPixelWiseMultiplicationKernel::GCPixelWiseMultiplicationKernel()
-    : _input1(nullptr), _input2(nullptr), _output(nullptr)
-{
-}
-
-void GCPixelWiseMultiplicationKernel::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-    ARM_COMPUTE_ERROR_ON_MSG(scale < 0, "Scale cannot be negative. ");
-
-    // Auto initialize output if not initialized
-    {
-        set_shape_if_empty(*output->info(), input1->info()->tensor_shape());
-        set_format_if_unknown(*output->info(), Format::F32);
-    }
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output);
-    ARM_COMPUTE_ERROR_ON_MSG(scale < 0, "Scale cannot be negative. ");
-
-    _input1 = input1;
-    _input2 = input2;
-    _output = output;
-
-    std::string data_type;
-    std::string compute_type;
-
-    // Set kernel build options
-    std::set<std::string> build_opts;
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-
-    build_opts.emplace("#define SCALE " + support::cpp11::to_string(scale));
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("pixelwise_mul_float", build_opts));
-
-    // Configure kernel window
-    constexpr unsigned int num_elems_processed_per_iteration = 1;
-
-    Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration));
-
-    AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal input2_access(input2->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
-    update_window_and_padding(win, input1_access, input2_access, output_access);
-
-    ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(),
-                                                       input2->info()->valid_region());
-    output_access.set_valid_region(win, valid_region);
-
-    IGCKernel::configure(win);
-}
-
-void GCPixelWiseMultiplicationKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
-    _kernel.use();
-
-    Window slice = window.first_slice_window_3D();
-
-    do
-    {
-        unsigned int idx     = 0;
-        unsigned int binding = 1;
-        add_3D_tensor_argument(idx, _input1, binding++, slice);
-        add_3D_tensor_argument(idx, _input2, binding++, slice);
-        add_3D_tensor_argument(idx, _output, binding++, slice);
-
-        _kernel.update_shader_params();
-        enqueue(*this, slice);
-    }
-    while(window.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
deleted file mode 100644
index 36499eb4fd..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
+++ /dev/null
@@ -1,372 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-#include <tuple>
-
-using namespace arm_compute;
-
-namespace
-{
-// Internal window config info
-using GCPoolingConfig = std::pair<unsigned int, BorderSize>; //num_elems_processed_per_iteration, border_size
-
-void auto_init(const ITensorInfo *input, ITensorInfo *output, unsigned int pooled_w, unsigned int pooled_h)
-{
-    TensorShape output_shape{ input->tensor_shape() };
-    output_shape.set(0, pooled_w);
-    output_shape.set(1, pooled_h);
-
-    auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape));
-}
-
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
-{
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(indices, "Indices not supported in GLES backend");
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG((is_data_type_quantized_asymmetric(input->data_type()) && pool_info.pool_type == PoolingType::L2),
-                                    "Unsupported combination of parameters!");
-    ARM_COMPUTE_RETURN_ERROR_ON(!pool_info.pad_stride_info.padding_is_symmetric());
-
-    const bool         is_global_pooling = pool_info.is_global_pooling;
-    const unsigned int pool_size         = is_global_pooling ? input->tensor_shape().x() : pool_info.pool_size.width;
-
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_global_pooling && (input->tensor_shape().x() != input->tensor_shape().y()),
-                                    "Global pooling is supported only with rectangular inputs!");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_global_pooling && ((pool_info.pad_stride_info.pad().first >= pool_size) || (pool_info.pad_stride_info.pad().second >= pool_size)),
-                                    "Invalid pool size and pool pad combination!");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(pool_info.pool_size.width != pool_info.pool_size.height, "Invalid Pool size, width not equal to height!");
-
-    // Checks performed when output is configured
-    if(output->total_size() != 0)
-    {
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
-        unsigned int pooled_w = 0;
-        unsigned int pooled_h = 0;
-        std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(0),
-                                                         input->dimension(1),
-                                                         pool_size,
-                                                         pool_size,
-                                                         pool_info.pad_stride_info);
-        ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) != pooled_w) || (output->dimension(1) != pooled_h),
-                                        "Invalid output pooling dimensions!");
-    }
-
-    return Status{};
-}
-
-std::tuple<Status, Window, GCPoolingConfig> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &pool_info)
-{
-    int                 pool_pad_x      = 0;
-    int                 pool_pad_y      = 0;
-    int                 pool_stride_x   = 0;
-    int                 pool_stride_y   = 0;
-    unsigned int        pooled_w        = 0;
-    unsigned int        pooled_h        = 0;
-    int                 pool_size       = pool_info.pool_size.width;
-    const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
-    std::tie(pool_pad_x, pool_pad_y)       = pad_stride_info.pad();
-    std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
-
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
-    // Update pool size in case of global pooling
-    pool_size = pool_info.is_global_pooling ? input->dimension(0) : pool_size;
-
-    // Check output dimensions
-    std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(0),
-                                                     input->dimension(1),
-                                                     pool_size,
-                                                     pool_size,
-                                                     pad_stride_info);
-
-    auto_init(input, output, pooled_w, pooled_h);
-
-    BorderSize border_size = BorderSize(pool_pad_y, pool_pad_x);
-
-    const int input_width  = input->dimension(0);
-    const int input_height = input->dimension(1);
-
-    unsigned int num_elems_processed_per_iteration = 1;
-
-    // Create kernel
-    if(pool_size == 3)
-    {
-        // Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenGLES kernel where
-        // each thread computes 4 output elements
-        const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3);
-
-        int num_elems_read_per_iteration = pool_size;
-
-        if(input->data_type() == DataType::F32)
-        {
-            if(is_pool3x3_stride_le3)
-            {
-                // Change the number of elements processed and number of elements read per iteration for pooling 3x3 with stride less equal than 3
-                num_elems_processed_per_iteration = 4;
-                num_elems_read_per_iteration      = pool_size * (pool_stride_x + 1);
-            }
-        }
-        else
-        {
-            if(is_pool3x3_stride_le3)
-            {
-                num_elems_processed_per_iteration = 4;
-            }
-            else
-            {
-                num_elems_processed_per_iteration = 2;
-            }
-        }
-
-        const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width;
-        const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
-
-        border_size.right  = std::max(upper_bound_w, pool_pad_x);
-        border_size.bottom = std::max(upper_bound_h, pool_pad_y);
-    }
-    else // Run general case
-    {
-        if(input->data_type() == DataType::F32)
-        {
-            num_elems_processed_per_iteration = 1;
-        }
-        else
-        {
-            num_elems_processed_per_iteration = 2;
-        }
-
-        const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + pool_size) - input_width;
-        const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
-
-        border_size.right  = std::max(upper_bound_w, pool_pad_x);
-        border_size.bottom = std::max(upper_bound_h, pool_pad_y);
-    }
-    // Configure kernel window
-    Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
-
-    if(input->data_type() == DataType::F32)
-    {
-        AccessWindowStatic     input_access(input, -pool_pad_x, -pool_pad_y, input_width + border_size.right, input_height + border_size.bottom);
-        AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-        bool                   window_changed = update_window_and_padding(win, input_access, output_access);
-        output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
-        Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
-        return std::make_tuple(err, win, GCPoolingConfig(num_elems_processed_per_iteration, border_size));
-    }
-    else
-    {
-        // Calculate output right and bottom border
-        const int output_width          = output->dimension(0);
-        const int output_height         = output->dimension(1);
-        const int output_padding_right  = ceil_to_multiple(output_width, num_elems_processed_per_iteration) - output_width;
-        const int output_padding_bottom = ceil_to_multiple(output_height, 1) - output_height;
-
-        const int input_total_width    = std::max(int(input->padding().left), int(pool_pad_x)) + input_width + std::max(int(input->padding().right), int(pool_pad_x));
-        const int input_padding_right  = ceil_to_multiple(input_total_width, num_elems_processed_per_iteration) - input_width - pool_pad_x;
-        const int input_total_height   = std::max(int(input->padding().top), int(pool_pad_y)) + input_height + std::max(int(input->padding().bottom), int(pool_pad_y));
-        const int input_padding_bottom = input_total_height - input_height - pool_pad_y;
-
-        // Configure kernel window
-        AccessWindowStatic input_access(input, -pool_pad_x, -pool_pad_y, input_width + input_padding_right, input_height + input_padding_bottom);
-        AccessWindowStatic output_access(output, 0, 0, output_width + output_padding_right, output_height + output_padding_bottom);
-        bool               window_changed = update_window_and_padding(win, input_access, output_access);
-        output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
-        Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
-        return std::make_tuple(err, win, GCPoolingConfig(num_elems_processed_per_iteration, border_size));
-    }
-}
-} // namespace
-
-GCPoolingLayerKernel::GCPoolingLayerKernel()
-    : _input(nullptr), _output(nullptr), _indices(nullptr), _pool_info(), _border_size(0), _num_elems_processed_per_iteration(1)
-{
-}
-
-BorderSize GCPoolingLayerKernel::border_size() const
-{
-    return _border_size;
-}
-
-void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info, IGCTensor *indices)
-{
-    int                 pool_pad_x      = 0;
-    int                 pool_pad_y      = 0;
-    int                 pool_stride_x   = 0;
-    int                 pool_stride_y   = 0;
-    unsigned int        pooled_w        = 0;
-    unsigned int        pooled_h        = 0;
-    const PoolingType   pool_type       = pool_info.pool_type;
-    int                 pool_size       = pool_info.pool_size.width;
-    const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
-    const bool          exclude_padding = pool_info.exclude_padding;
-    std::tie(pool_pad_x, pool_pad_y)       = pad_stride_info.pad();
-    std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
-
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
-    // Update pool size in case of global pooling
-    pool_size = pool_info.is_global_pooling ? input->info()->dimension(0) : pool_size;
-
-    // Check output dimensions
-    std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(0),
-                                                     input->info()->dimension(1),
-                                                     pool_size,
-                                                     pool_size,
-                                                     pad_stride_info);
-
-    auto_init(input->info(), output->info(), pooled_w, pooled_h);
-
-    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, (indices) ? indices->info() : nullptr));
-
-    // Set instance variables
-    _input     = input;
-    _output    = output;
-    _pool_info = pool_info;
-    _indices   = indices;
-    // Set build options
-    std::set<std::string> build_opts;
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-    if(input->info()->data_type() == DataType::F32)
-    {
-        build_opts.insert("#define DATA_TYPE_FP32");
-    }
-    else
-    {
-        build_opts.insert("#define DATA_TYPE_FP16");
-    }
-    if(exclude_padding)
-    {
-        build_opts.emplace("#define EXCLUDE_PADDING");
-    }
-    build_opts.emplace(("#define POOL_" + string_from_pooling_type(pool_type)));
-    build_opts.emplace(("#define STRIDE_X " + support::cpp11::to_string(pool_stride_x)));
-    build_opts.emplace(("#define MAX_WIDTH " + support::cpp11::to_string(input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x))));
-    build_opts.emplace(("#define MAX_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y))));
-    build_opts.emplace(("#define STRIDE_Y " + support::cpp11::to_string(pool_stride_y)));
-    build_opts.emplace(("#define PAD_X " + support::cpp11::to_string(pool_pad_x)));
-    build_opts.emplace(("#define PAD_Y " + support::cpp11::to_string(pool_pad_y)));
-
-    // Create kernel
-    if((pool_size == 2) || (pool_size == 3) || (pool_size == 7))
-    {
-        // Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenGLES kernel where
-        // each thread computes 4 output elements
-        const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3);
-
-        std::string kernel_name = "pooling_layer_" + support::cpp11::to_string(pool_size);
-        if(is_pool3x3_stride_le3)
-        {
-            build_opts.insert("#define POOLING_LAYER_3_OPTIMIZED");
-            _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name + "_optimized", build_opts));
-        }
-        else
-        {
-            build_opts.insert("#define POOLING_LAYER_" + support::cpp11::to_string(pool_size));
-            _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, build_opts));
-        }
-    }
-    else // Run general case
-    {
-        build_opts.emplace(("#define POOL_SIZE " + support::cpp11::to_string(pool_size)));
-
-        build_opts.insert("#define POOLING_LAYER_N");
-        _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("pooling_layer_n", build_opts));
-    }
-    // Configure kernel window
-    auto win_config = validate_and_configure_window(input->info(), output->info(), pool_info);
-    ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
-
-    IGCKernel::configure(std::get<1>(win_config));
-    GCPoolingConfig pooling_config     = std::get<2>(win_config);
-    _num_elems_processed_per_iteration = pooling_config.first;
-    _border_size                       = pooling_config.second;
-}
-
-Status GCPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
-{
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, indices));
-    ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get(), pool_info)));
-
-    return Status{};
-}
-
-void GCPoolingLayerKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
-    unsigned int pool_pad_x;
-    unsigned int pool_pad_y;
-    unsigned int pool_stride_x;
-    unsigned int pool_stride_y;
-    std::tie(pool_pad_x, pool_pad_y)       = _pool_info.pad_stride_info.pad();
-    std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info.stride();
-
-    _kernel.use();
-
-    _output->set_needs_shifting(true);
-
-    Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
-
-    Window slice         = window_collapsed.first_slice_window_3D();
-    Window slice_in_orig = window_collapsed.first_slice_window_3D();
-
-    slice.shift(Window::DimX, -(_output->info()->padding()).left);
-
-    do
-    {
-        // Upsample input by pool size
-        Window in_slice(slice_in_orig); // NOLINT
-        in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start() - pool_pad_x, in_slice.x().end() * pool_stride_x, pool_stride_x * _num_elems_processed_per_iteration));
-        in_slice.set(Window::DimY, Window::Dimension(in_slice.y().start() - pool_pad_y, in_slice.y().end() * pool_stride_y, pool_stride_y));
-
-        // Set inputs
-        unsigned int idx = 0;
-        add_3D_tensor_argument(idx, _input, 1, in_slice);
-        add_3D_tensor_argument(idx, _output, 2, slice);
-
-        _kernel.update_shader_params();
-        enqueue(*this, slice);
-    }
-    while(window_collapsed.slide_window_slice_3D(slice) && window_collapsed.slide_window_slice_3D(slice_in_orig));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp
deleted file mode 100644
index a85a0e7e98..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-BorderSize GCScaleKernel::border_size() const
-{
-    return BorderSize(1);
-}
-
-void GCScaleKernel::configure(const IGCTensor *input, IGCTensor *output, InterpolationPolicy policy, bool border_undefined, SamplingPolicy sampling_policy)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_ERROR_ON(output == input);
-    ARM_COMPUTE_ERROR_ON(policy != InterpolationPolicy::NEAREST_NEIGHBOR);
-
-    _input  = input;
-    _output = output;
-
-    // Compute the ratio between source width/height and destination width/height
-    const auto wr = static_cast<float>(input->info()->dimension(0)) / static_cast<float>(output->info()->dimension(0));
-    const auto hr = static_cast<float>(input->info()->dimension(1)) / static_cast<float>(output->info()->dimension(1));
-
-    // Compute actual border size
-    BorderSize border = border_undefined ? BorderSize(0) : border_size();
-
-    // Area interpolation behaves as Nearest Neighbour in case of up-sampling
-    if(policy == InterpolationPolicy::AREA && wr <= 1.f && hr <= 1.f)
-    {
-        policy = InterpolationPolicy::NEAREST_NEIGHBOR;
-    }
-    else
-    {
-        ARM_COMPUTE_ERROR_ON(policy == InterpolationPolicy::AREA);
-    }
-
-    // Create kernel
-    std::set<std::string> build_opts;
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-
-    build_opts.emplace("#define DATA_TYPE_FP16");
-    build_opts.emplace("#define BORDER_SIZE " + support::cpp11::to_string(border.right));
-    if(sampling_policy == SamplingPolicy::TOP_LEFT)
-    {
-        build_opts.emplace("#define SAMPLING_POLICY_TOP_LEFT");
-    }
-    else
-    {
-        build_opts.emplace("#define SAMPLING_POLICY_CENTER");
-    }
-
-    // Configure kernel window
-    unsigned int num_elems_processed_per_iteration = 4;
-    unsigned int input_width_alignment             = 2;
-
-    // performance optimization for 2x upscaling with no border
-    if((fabs(wr - 0.5) < 1e-6) && (fabs(hr - 0.5) < 1e-6) && border_undefined)
-    {
-        num_elems_processed_per_iteration = 8;
-        input_width_alignment             = 4;
-        build_opts.emplace("#define SCALE_NEAREST_8X");
-    }
-    else
-    {
-        build_opts.emplace("#define SCALE_NEAREST_GENERIC");
-    }
-
-    std::string interpolation_name = string_from_interpolation_policy(policy); // NOLINT
-    std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower);
-    std::string kernel_name = "scale_" + interpolation_name;
-    _kernel                 = GCKernelLibrary::get().create_kernel(kernel_name, build_opts);
-
-    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-
-    const ValidRegion &input_valid_region = input->info()->valid_region();
-
-    const int total_width   = border.left + input_valid_region.anchor[0] + input_valid_region.shape[0] + border.right;
-    const int padding_right = ceil_to_multiple(total_width, input_width_alignment) - border.left - input_valid_region.anchor[0] - input_valid_region.shape[0];
-
-    // Reads can occur within the valid region of the input
-    AccessWindowStatic input_access(input->info(),
-                                    input_valid_region.anchor[0] - border.left, input_valid_region.anchor[1] - border.top,
-                                    input_valid_region.anchor[0] + input_valid_region.shape[0] + padding_right,
-                                    input_valid_region.anchor[1] + input_valid_region.shape[1] + border.bottom);
-
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
-    update_window_and_padding(win, input_access, output_access);
-
-    output_access.set_valid_region(win, calculate_valid_region_scale(*(input->info()),
-                                                                     output->info()->tensor_shape(),
-                                                                     policy,
-                                                                     sampling_policy,
-                                                                     border_undefined));
-
-    IGCKernel::configure(win);
-
-    unsigned int idx = 2 * num_arguments_per_3D_tensor(); //Skip the tensor parameters
-    _kernel.set_argument<float>(idx++, static_cast<float>(input->info()->dimension(0)));
-    _kernel.set_argument<float>(idx++, static_cast<float>(input->info()->dimension(1)));
-    _kernel.set_argument<float>(idx++, wr);
-    _kernel.set_argument<float>(idx++, hr);
-}
-
-void GCScaleKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
-    _kernel.use();
-
-    _output->set_needs_shifting(true);
-
-    Window slice    = window.first_slice_window_3D();
-    Window slice_in = window.first_slice_window_3D();
-
-    slice.shift(Window::DimX, -(_output->info()->padding()).left);
-
-    do
-    {
-        unsigned int idx = 0;
-        add_3D_tensor_argument(idx, _input, 1, slice_in);
-        add_3D_tensor_argument(idx, _output, 2, slice);
-        _kernel.update_shader_params();
-        enqueue(*this, slice);
-    }
-    while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
deleted file mode 100644
index f250801eaf..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-void GCLogits1DMaxKernel::configure(const IGCTensor *input, IGCTensor *output)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-
-    // Softmax across the x dimension
-    TensorShape output_shape{ input->info()->tensor_shape() };
-    output_shape.set(0, 1);
-
-    // Output auto initialization if not yet initialized
-    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
-
-    _input  = input;
-    _output = output;
-
-    // Set build options
-    std::set<std::string> build_opts;
-    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.insert("#define " + dt_name);
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-    build_opts.insert("#define SOFTMAX_LAYER_MAX");
-
-    // Tell the kernel that the width is not a multiple of 8
-    if((input->info()->dimension(0) % 8) != 0)
-    {
-        build_opts.insert("#define NON_MULTIPLE_OF_8");
-    }
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("softmax_layer_max", build_opts));
-
-    // Set fixed arguments
-    unsigned int idx = 2 * num_arguments_per_3D_tensor(); //Skip the input and output parameters
-    _kernel.set_argument(idx++, input->info()->dimension(0));
-
-    // Configure kernel window
-    // The kernel loops over all elements in steps of 8
-    const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 8);
-    unsigned int       num_elems_written_per_iteration   = 1;
-    if(input->info()->data_type() == DataType::F16)
-    {
-        num_elems_written_per_iteration = 2;
-    }
-
-    Window                 win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
-    update_window_and_padding(win, input_access, output_access);
-
-    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
-    IGCKernel::configure(win);
-}
-
-GCLogits1DShiftExpSumKernel::GCLogits1DShiftExpSumKernel()
-    : _input(nullptr), _max(nullptr), _output(nullptr), _sum(nullptr)
-{
-}
-
-void GCLogits1DShiftExpSumKernel::configure(const IGCTensor *input, const IGCTensor *max, IGCTensor *output, IGCTensor *sum)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output);
-
-    // Output auto initialization if not yet initialized
-    auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type());
-    auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type());
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, max, sum);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(max, sum);
-
-    _input  = input;
-    _max    = max;
-    _output = output;
-    _sum    = sum;
-
-    // Set build options
-    std::set<std::string> build_opts;
-    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.insert("#define " + dt_name);
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-    build_opts.insert("#define SOFTMAX_LAYER_SHIFT_EXP_SUM");
-
-    // Tell the kernel that the width is not a multiple of 8
-    if((input->info()->dimension(0) % 8) != 0)
-    {
-        build_opts.insert("#define NON_MULTIPLE_OF_8");
-    }
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("softmax_layer_shift_exp_sum", build_opts));
-
-    // Set fixed arguments
-    unsigned int idx = 4 * num_arguments_per_3D_tensor(); //Skip the input and output parameters
-    _kernel.set_argument(idx++, input->info()->dimension(0));
-
-    // Configure window
-    // The kernel loops over all elements in steps of 8
-    const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 8);
-    unsigned int       num_elems_written_per_iteration   = 1;
-    if(input->info()->data_type() == DataType::F16)
-    {
-        num_elems_written_per_iteration = 2;
-    }
-
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
-    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal max_access(max->info(), 0, num_elems_written_per_iteration);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal sum_access(sum->info(), 0, num_elems_written_per_iteration);
-
-    update_window_and_padding(win, input_access, max_access, output_access, sum_access);
-
-    output_access.set_valid_region(win, input->info()->valid_region());
-    sum_access.set_valid_region(win, ValidRegion(Coordinates(), sum->info()->tensor_shape()));
-
-    IGCKernel::configure(win);
-}
-
-void GCLogits1DShiftExpSumKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
-    Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
-    Window slice            = window_collapsed.first_slice_window_3D();
-
-    _kernel.use();
-
-    do
-    {
-        unsigned int idx     = 0;
-        unsigned int binding = 1; // SSBO binding starts from 1.
-        // Set inputs
-        add_3D_tensor_argument(idx, _input, binding++, slice);
-        add_3D_tensor_argument(idx, _max, binding++, slice);
-        add_3D_tensor_argument(idx, _output, binding++, slice);
-        add_3D_tensor_argument(idx, _sum, binding++, slice);
-        _kernel.update_shader_params();
-        enqueue(*this, slice);
-    }
-    while(window_collapsed.slide_window_slice_3D(slice));
-}
-
-GCLogits1DNormKernel::GCLogits1DNormKernel()
-    : _input(nullptr), _sum(nullptr), _output(nullptr)
-{
-}
-
-void GCLogits1DNormKernel::configure(const IGCTensor *input, const IGCTensor *sum, IGCTensor *output)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(sum, output);
-
-    // Output auto initialization if not yet initialized
-    auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type());
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-
-    _input  = input;
-    _sum    = sum;
-    _output = output;
-
-    // Set build options
-    std::set<std::string> build_opts;
-    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.insert("#define " + dt_name);
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-    build_opts.insert("#define SOFTMAX_LAYER_NORM");
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("softmax_layer_norm", build_opts));
-
-    // Configure window
-    constexpr unsigned int num_elems_processed_per_iteration = 8;
-    unsigned int           num_elems_written_per_iteration   = 1;
-    if(input->info()->data_type() == DataType::F16)
-    {
-        num_elems_written_per_iteration = 2;
-    }
-
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
-    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowStatic     sum_access(sum->info(), 0, 0, num_elems_written_per_iteration, sum->info()->dimension(1));
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
-    update_window_and_padding(win, input_access, sum_access, output_access);
-
-    output_access.set_valid_region(win, input->info()->valid_region());
-
-    IGCKernel::configure(win);
-}
-
-void GCLogits1DNormKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
-    Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
-    Window slice            = window_collapsed.first_slice_window_3D();
-
-    _kernel.use();
-
-    do
-    {
-        Window sum_slice = slice;
-        sum_slice.set(Window::DimX, Window::Dimension(0, 1, 1));
-
-        unsigned int idx     = 0;
-        unsigned int binding = 1; // SSBO binding starts from 1.
-        // Set inputs
-        add_3D_tensor_argument(idx, _input, binding++, slice);
-        add_3D_tensor_argument(idx, _sum, binding++, slice);
-        add_3D_tensor_argument(idx, _output, binding++, slice);
-
-        _kernel.update_shader_params();
-        enqueue(*this, slice);
-    }
-    while(window_collapsed.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp
deleted file mode 100644
index 16dafaf543..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-GCTensorShiftKernel::GCTensorShiftKernel()
-    : _input(nullptr), _lws(gles::NDRange(1U, 1U, 1U)), _left_padding(0)
-{
-}
-
-void GCTensorShiftKernel::configure(IGCTensor *input)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-
-    _input = input;
-
-    std::set<std::string> options;
-    options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0]));
-    options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1]));
-    options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2]));
-    options.emplace("#define WIDTH " + support::cpp11::to_string(input->info()->dimension(0)));
-
-    std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    options.emplace(("#define " + dt_name));
-
-    unsigned int num_elems_written_per_iteration_x = input->info()->dimension(0) + input->info()->padding().left + input->info()->padding().right;
-
-    std::stringstream kernel_name;
-    kernel_name << "tensorshift";
-
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name.str(), options));
-
-    Window win;
-    win.set(Window::DimX, Window::Dimension(0, num_elems_written_per_iteration_x, num_elems_written_per_iteration_x));
-    win.use_tensor_dimensions(input->info()->tensor_shape(), Window::DimY);
-    win.use_tensor_dimensions(input->info()->tensor_shape(), Window::DimZ);
-
-    _left_padding = _input->info()->padding().left;
-
-    IGCKernel::configure(win);
-}
-
-void GCTensorShiftKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
-    if(int(_left_padding) == 0 || !_input->needs_shifting())
-    {
-        return;
-    }
-
-    _kernel.use();
-
-    // Get initial windows
-    Window slice = window.first_slice_window_3D();
-    slice.shift(Window::DimX, -(_input->info()->padding()).left);
-
-    do
-    {
-        unsigned int idx = 0;
-
-        add_3D_tensor_argument(idx, _input, 1, slice);
-
-        _kernel.set_argument(idx++, static_cast<unsigned int>(_left_padding));
-
-        _kernel.update_shader_params();
-        enqueue(*this, slice, _lws);
-    }
-    while(window.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
deleted file mode 100644
index ead50ce1f5..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-void GCTransposeKernel::configure(const IGCTensor *input, IGCTensor *output)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-
-    TensorShape  output_shape{ input->info()->tensor_shape() };
-    const size_t w_out = input->info()->dimension(1);
-    const size_t h_out = input->info()->dimension(0);
-    output_shape.set(0, w_out);
-    output_shape.set(1, h_out);
-
-    // Output tensor auto inizialitation if not yet initialized
-    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
-    _input  = input;
-    _output = output;
-
-    // for better performance
-    if(w_out < 512 && h_out < 512)
-    {
-        _lws_hint = gles::NDRange(8U, 1U, 1U);
-    }
-    else
-    {
-        _lws_hint = gles::NDRange(1U, 8U, 1U);
-    }
-
-    std::set<std::string> build_opts;
-    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.emplace(("#define " + dt_name));
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws_hint[0]));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws_hint[1]));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws_hint[2]));
-
-    // Configure kernel window
-    unsigned int num_elems_processed_per_iteration = 4;
-
-    if(input->info()->data_type() == DataType::F16)
-    {
-#define TRANSPOSE_8X8
-
-#if defined(TRANSPOSE_4X4)
-        build_opts.emplace(("#define TRANSPOSE_4X4"));
-        num_elems_processed_per_iteration = 4;
-#elif defined(TRANSPOSE_8X8) /* TRANSPOSE_4X4 */
-        if(w_out != h_out)
-        {
-            build_opts.emplace("#define TRANSPOSE_8X8");
-            num_elems_processed_per_iteration = 8;
-        }
-        else
-        {
-            build_opts.emplace("#define TRANSPOSE_8X8_SQUARE");
-            num_elems_processed_per_iteration = 8;
-        }
-#endif                       /* TRANSPOSE_4X4 */
-    }
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("transpose", build_opts));
-
-    const unsigned int width_aligned  = num_elems_processed_per_iteration * static_cast<unsigned int>(_lws_hint[0]);
-    const unsigned int height_aligned = num_elems_processed_per_iteration * static_cast<unsigned int>(_lws_hint[1]);
-
-    AccessWindowStatic input_access(input->info(), 0, 0,
-                                    ceil_to_multiple(input->info()->dimension(0), width_aligned),
-                                    ceil_to_multiple(input->info()->dimension(1), height_aligned));
-    AccessWindowStatic output_access(output->info(), 0, 0,
-                                     ceil_to_multiple(output->info()->dimension(0), height_aligned),
-                                     ceil_to_multiple(output->info()->dimension(1), width_aligned));
-
-    Window win = calculate_max_window(*input->info(), Steps(width_aligned, height_aligned));
-    win.set_dimension_step(Window::DimX, num_elems_processed_per_iteration);
-    win.set_dimension_step(Window::DimY, num_elems_processed_per_iteration);
-    update_window_and_padding(win, input_access, output_access);
-    output_access.set_valid_region(win, output->info()->valid_region());
-
-    IGCKernel::configure(win);
-}
-
-void GCTransposeKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
-    _kernel.use();
-
-    Window slice = window.first_slice_window_2D();
-
-    do
-    {
-        unsigned int idx = 0;
-
-        add_2D_tensor_argument(idx, _input, 1, slice);
-        add_2D_tensor_argument(idx, _output, 2, slice);
-        _kernel.update_shader_params();
-        enqueue(*this, slice, _lws_hint);
-    }
-    while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp
deleted file mode 100644
index 07c09fa4ea..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "support/StringSupport.h"
-
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-GCWeightsReshapeKernel::GCWeightsReshapeKernel()
-    : _input(nullptr), _biases(nullptr), _output(nullptr)
-{
-}
-
-void GCWeightsReshapeKernel::configure(const IGCTensor *input, const IGCTensor *biases, IGCTensor *output)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-
-    // Output tensor auto inizialitation if not yet initialized
-    auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_weights_reshaped_shape(*input->info(), (biases != nullptr))));
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
-    if(biases != nullptr)
-    {
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
-        ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->num_dimensions() != 1));
-        ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->num_dimensions() != 2));
-        ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3]));
-        ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3] || biases->info()->dimension(1) != input->info()->tensor_shape()[4]));
-    }
-
-    _biases = biases;
-    _output = output;
-    _input  = input;
-
-    // Create build options
-    std::set<std::string> build_opts;
-    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.emplace("#define " + dt_name);
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-    build_opts.emplace("#define RESHAPE_TO_COLUMNS");
-    if(biases != nullptr)
-    {
-        build_opts.emplace("#define HAS_BIAS");
-    }
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("reshape_to_columns", build_opts));
-
-    // Set static arguments
-    unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor();
-    idx += (biases != nullptr) ? num_arguments_per_1D_tensor() : 0;
-    _kernel.set_argument(idx++, _input->info()->dimension(0));
-    _kernel.set_argument(idx++, _input->info()->dimension(1));
-    _kernel.set_argument(idx++, _input->info()->dimension(2));
-    _kernel.set_argument(idx++, _input->info()->dimension(3));
-
-    // Configure window
-    Window win = calculate_max_window(*input->info(), Steps());
-
-    // The GCWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped
-    output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-    IGCKernel::configure(win);
-}
-
-void GCWeightsReshapeKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
-
-    Window out_window;
-    out_window.use_tensor_dimensions(_output->info()->tensor_shape());
-
-    Window in_slice  = window.first_slice_window_3D();
-    Window out_slice = out_window.first_slice_window_2D();
-
-    Window biases_window;
-    Window biases_slice;
-
-    if(_biases != nullptr)
-    {
-        biases_window.use_tensor_dimensions(_biases->info()->tensor_shape());
-        biases_slice = biases_window.first_slice_window_1D();
-    }
-
-    _kernel.use();
-
-    do
-    {
-        // Set arguments
-        unsigned idx = 0;
-        add_3D_tensor_argument(idx, _input, 1, in_slice);
-        add_2D_tensor_argument(idx, _output, 2, out_slice);
-        if(_biases != nullptr)
-        {
-            add_1D_tensor_argument(idx, _biases, 3, biases_slice);
-            biases_window.slide_window_slice_1D(biases_slice);
-        }
-
-        _kernel.update_shader_params();
-        // Run kernel
-        enqueue(*this, in_slice);
-    }
-    while(window.slide_window_slice_4D(in_slice) && out_window.slide_window_slice_2D(out_slice));
-}