aboutsummaryrefslogtreecommitdiff
path: root/src/core/GLES_COMPUTE/kernels
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/GLES_COMPUTE/kernels')
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp107
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp134
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp159
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp245
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp124
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp112
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp250
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp448
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp107
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp166
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp125
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp129
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp101
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp336
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp125
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp302
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp121
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp167
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp123
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp372
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp169
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp276
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp105
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp143
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp141
25 files changed, 0 insertions, 4587 deletions
diff --git a/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp
deleted file mode 100644
index e7ff13692e..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-GCAbsoluteDifferenceKernel::GCAbsoluteDifferenceKernel()
- : _input1(nullptr), _input2(nullptr), _output(nullptr)
-{
-}
-
-void GCAbsoluteDifferenceKernel::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output);
-
- _input1 = input1;
- _input2 = input2;
- _output = output;
-
- constexpr unsigned int num_elems_processed_per_iteration = 4;
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("absdiff", build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowRectangle input1_access(input1->info(), 0, 0, 4, 1);
- AccessWindowRectangle input2_access(input2->info(), 0, 0, 4, 1);
- AccessWindowRectangle output_access(output->info(), 0, 0, 4, 1);
-
- update_window_and_padding(win, input1_access, input2_access, output_access);
-
- ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(),
- input2->info()->valid_region());
-
- output_access.set_valid_region(win, valid_region);
-
- IGCKernel::configure(win);
-}
-
-void GCAbsoluteDifferenceKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
- _kernel.use();
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- unsigned int binding = 1; // SSBO binding starts from 1.
- add_2D_tensor_argument(idx, _input1, binding++, slice);
- add_2D_tensor_argument(idx, _input2, binding++, slice);
- add_2D_tensor_argument(idx, _output, binding++, slice);
-
- _kernel.update_shader_params();
-
- enqueue(*this, slice);
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
deleted file mode 100644
index 5aad8070fc..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h"
-
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-GCActivationLayerKernel::GCActivationLayerKernel(GCCoreRuntimeContext *ctx)
- : _input(nullptr), _output(nullptr), _ctx(ctx)
-{
-}
-
-void GCActivationLayerKernel::configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-
- // Make sure _kernel is initialized before calling the parent's configure
- _input = input;
- _output = input;
-
- if(output != nullptr)
- {
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type());
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- _output = output;
- }
-
- unsigned int num_elems_processed_per_iteration = 4 / input->info()->element_size();
-
- // Set build options
- std::set<std::string> build_opts;
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- build_opts.emplace(("#define " + string_from_activation_func(act_info.activation())));
- build_opts.emplace(("#define " + dt_name));
- build_opts.emplace(("#define A_VAL " + float_to_string_with_full_precision(act_info.a())));
- build_opts.emplace(("#define B_VAL " + float_to_string_with_full_precision(act_info.b())));
- build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)));
- build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)));
- build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)));
-
- // Create kernel
- _kernel = create_opengl_kernel(_ctx, "activation_layer", build_opts);
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
- if(output != nullptr)
- {
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win,
- AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
- output_access);
-
- output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
- }
- else
- {
- update_window_and_padding(win,
- AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
- }
-
- IGCKernel::configure(win);
-}
-
-void GCActivationLayerKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
- _kernel.use();
-
- _output->set_needs_shifting(true);
-
- Window collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
- Window slice = collapsed.first_slice_window_3D();
- Window slice_in = collapsed.first_slice_window_3D();
-
- slice.shift(Window::DimX, -(_output->info()->padding()).left);
-
- if(_input == _output)
- {
- slice_in.shift(Window::DimX, -(_input->info()->padding()).left);
- }
-
- do
- {
- unsigned int idx = 0;
- unsigned int binding = 1;
- add_3D_tensor_argument(idx, _input, binding++, slice);
- add_3D_tensor_argument(idx, _output, binding++, slice_in);
- _kernel.update_shader_params();
- enqueue(*this, slice);
- }
- while(collapsed.slide_window_slice_3D(slice) && collapsed.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp
deleted file mode 100644
index 0a5fe11347..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <cstddef>
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)
-{
- ARM_COMPUTE_UNUSED(policy);
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F16);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::F16);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, input2);
-
- // Validate in case of configured output
- if((output != nullptr) && (output->total_size() != 0))
- {
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, output);
- }
-
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output)
-{
- constexpr unsigned int num_elems_processed_per_iteration = 8;
-
- Window win = calculate_max_window(*input1, Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal input1_access(input1, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal input2_access(input2, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win, input1_access, input2_access, output_access);
-
- ValidRegion valid_region = intersect_valid_regions(input1->valid_region(),
- input2->valid_region());
-
- output_access.set_valid_region(win, valid_region);
-
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-GCArithmeticAdditionKernel::GCArithmeticAdditionKernel()
- : _input1(nullptr), _input2(nullptr), _output(nullptr)
-{
-}
-
-void GCArithmeticAdditionKernel::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, ConvertPolicy policy)
-{
- ARM_COMPUTE_UNUSED(policy);
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
-
- // Auto initialize output if not initialized
- {
- set_shape_if_empty(*output->info(), input1->info()->tensor_shape());
- set_format_if_unknown(*output->info(), Format::F16);
- }
-
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info(), policy));
-
- _input1 = input1;
- _input2 = input2;
- _output = output;
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("arithmetic_add", build_opts));
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info());
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- IGCKernel::configure(win_config.second);
-}
-
-Status GCArithmeticAdditionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output, policy));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), input2->clone().get(), output->clone().get()).first);
-
- return Status{};
-}
-
-void GCArithmeticAdditionKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
- _kernel.use();
-
- _output->set_needs_shifting(true);
-
- Window slice = window.first_slice_window_3D();
- Window slice_in = window.first_slice_window_3D();
-
- slice.shift(Window::DimX, -(_output->info()->padding()).left);
-
- do
- {
- unsigned int idx = 0;
- unsigned int binding = 1; // SSBO binding starts from 1.
- add_3D_tensor_argument(idx, _input1, binding++, slice_in);
- add_3D_tensor_argument(idx, _input2, binding++, slice_in);
- add_3D_tensor_argument(idx, _output, binding++, slice);
-
- _kernel.update_shader_params();
-
- enqueue(*this, slice);
- }
- while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
deleted file mode 100644
index a1f7cd7eca..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *mean, const ITensorInfo *var,
- const ITensorInfo *beta, const ITensorInfo *gamma,
- float epsilon, ActivationLayerInfo act_info)
-{
- ARM_COMPUTE_UNUSED(epsilon);
- ARM_COMPUTE_UNUSED(var);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, var);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(mean, var);
-
- if(output->total_size() != 0)
- {
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- }
-
- if(beta != nullptr)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, beta);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, beta);
- }
- if(gamma != nullptr)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, gamma);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, gamma);
- }
- if(act_info.enabled())
- {
- ARM_COMPUTE_ERROR_ON(input->data_type() != DataType::F32 && input->data_type() != DataType::F16);
- ARM_COMPUTE_ERROR_ON(act_info.activation() != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::RELU
- && act_info.activation() != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU
- && act_info.activation() != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);
- ARM_COMPUTE_ERROR_ON(act_info.b() > act_info.a());
- }
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output,
- ITensorInfo *mean, ITensorInfo *var,
- ITensorInfo *beta, ITensorInfo *gamma)
-{
- // Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output, input->tensor_shape(), 1, input->data_type());
-
- unsigned int num_elems_processed_per_iteration = 1;
- if(input->data_type() == DataType::F16)
- {
- num_elems_processed_per_iteration = 4;
- }
-
- // Configure kernel window
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- AccessWindowStatic mean_access(mean, 0, 0, mean->dimension(0) + 3, mean->dimension(1));
- AccessWindowStatic var_access(var, 0, 0, var->dimension(0) + 3, var->dimension(1));
-
- bool window_changed = false;
- if(beta != nullptr)
- {
- AccessWindowStatic beta_access(beta, 0, 0, beta->dimension(0) + 3, beta->dimension(1));
- if(gamma != nullptr)
- {
- AccessWindowStatic gamma_access(gamma, 0, 0, gamma->dimension(0) + 3, gamma->dimension(1));
- window_changed = update_window_and_padding(win, input_access, output_access, mean_access, var_access, beta_access, gamma_access);
- }
- else
- {
- window_changed = update_window_and_padding(win, input_access, output_access, mean_access, var_access, beta_access);
- }
- }
- else
- {
- if(gamma != nullptr)
- {
- AccessWindowStatic gamma_access(gamma, 0, 0, gamma->dimension(0) + 3, gamma->dimension(1));
- window_changed = update_window_and_padding(win, input_access, output_access, mean_access, var_access, gamma_access);
- }
- else
- {
- window_changed = update_window_and_padding(win, input_access, output_access, mean_access, var_access);
- }
- }
- output_access.set_valid_region(win, input->valid_region());
-
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-GCBatchNormalizationLayerKernel::GCBatchNormalizationLayerKernel()
- : _input(nullptr), _output(nullptr), _mean(nullptr), _var(nullptr), _beta(nullptr), _gamma(nullptr), _epsilon(0.0f)
-{
-}
-
-void GCBatchNormalizationLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma,
- float epsilon, ActivationLayerInfo act_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, var);
-
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), mean->info(), var->info(),
- (beta != nullptr) ? beta->info() : nullptr, (gamma != nullptr) ? gamma->info() : nullptr,
- epsilon, act_info));
-
- _input = input;
- _output = output;
- _mean = mean;
- _var = var;
- _beta = beta;
- _gamma = gamma;
- _epsilon = epsilon;
-
- // Set build options
- std::set<std::string> build_opts;
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- build_opts.emplace(("#define " + dt_name));
- build_opts.emplace(("#define ESPILON " + float_to_string_with_full_precision(_epsilon)));
- build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)));
- build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)));
- build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)));
- if(beta == nullptr)
- {
- build_opts.emplace("#define USE_DEFAULT_BETA");
- }
- if(gamma == nullptr)
- {
- build_opts.emplace("#define USE_DEFAULT_GAMMA");
- }
-
- if(act_info.enabled())
- {
- build_opts.emplace("#define " + string_from_activation_func(act_info.activation()));
- build_opts.emplace("#define A_VAL " + float_to_string_with_full_precision(act_info.a()));
- build_opts.emplace("#define B_VAL " + float_to_string_with_full_precision(act_info.b()));
- }
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("batchnormalization_layer", build_opts));
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info(), mean->info(), var->info(),
- (beta != nullptr) ? beta->info() : nullptr, (gamma != nullptr) ? gamma->info() : nullptr);
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-
- IGCKernel::configure(win_config.second);
-}
-
-Status GCBatchNormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *mean, const ITensorInfo *var,
- const ITensorInfo *beta, const ITensorInfo *gamma,
- float epsilon, ActivationLayerInfo act_info)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, mean, var, beta, gamma, epsilon, act_info));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(),
- mean->clone().get(), var->clone().get(),
- beta->clone().get(), gamma->clone().get())
- .first);
-
- return Status{};
-}
-
-void GCBatchNormalizationLayerKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- _kernel.use();
-
- _output->set_needs_shifting(true);
-
- Window slice = window.first_slice_window_3D();
- Window slice_in = window.first_slice_window_3D();
-
- Window vector_slice = window.first_slice_window_1D();
- vector_slice.set(Window::DimX, Window::Dimension(0, 0, 0));
-
- unsigned int idx = 2 * num_arguments_per_3D_tensor();
- unsigned int binding_point = 3;
- add_1D_tensor_argument(idx, _mean, binding_point, vector_slice);
- add_1D_tensor_argument(idx, _var, ++binding_point, vector_slice);
- if(_beta != nullptr)
- {
- add_1D_tensor_argument(idx, _beta, ++binding_point, vector_slice);
- }
- if(_gamma != nullptr)
- {
- add_1D_tensor_argument(idx, _gamma, ++binding_point, vector_slice);
- }
-
- slice.shift(Window::DimX, -(_output->info()->padding()).left);
-
- do
- {
- idx = 0;
- add_3D_tensor_argument(idx, _input, 1, slice_in);
- add_3D_tensor_argument(idx, _output, 2, slice);
-
- _kernel.update_shader_params();
- enqueue(*this, slice);
- }
- while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
deleted file mode 100644
index 1e48dc8e88..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-GCCol2ImKernel::GCCol2ImKernel()
- : _input(nullptr), _output(nullptr), _convolved_dims()
-{
-}
-
-void GCCol2ImKernel::configure(const IGCTensor *input, IGCTensor *output,
- std::pair<unsigned int, unsigned int> convolved_dims)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-
- TensorShape output_shape = input->info()->tensor_shape();
- output_shape.set(0, convolved_dims.first);
- output_shape.set(1, convolved_dims.second);
- output_shape.set(2, input->info()->tensor_shape()[0]);
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- _input = input;
- _output = output;
- _convolved_dims = convolved_dims;
-
- const DataType dt = input->info()->data_type();
- const unsigned int local_size = 1;
-
- // Create kernel
- std::set<std::string> build_opts;
- build_opts.emplace("#define COL2IM ");
- build_opts.emplace("#define WIDTH_OUTPUT " + support::cpp11::to_string(_convolved_dims.first));
- const std::string dt_name = (dt == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- build_opts.emplace(("#define " + dt_name));
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(local_size));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(local_size));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(local_size));
-
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("col2im", build_opts));
-
- // Configure window
- const unsigned int num_elems_processed_per_iteration = (dt == DataType::F32) ? 1 : 2;
-
- Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
- const int input_padding = ceil_to_multiple(input->info()->dimension(0), 2) - input->info()->dimension(0);
-
- AccessWindowStatic input_access(input->info(), 0, 0, input->info()->dimension(0) + input_padding, input->info()->dimension(1) + 1);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, output->info()->valid_region());
-
- IGCKernel::configure(win);
-}
-
-void GCCol2ImKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
-
- _kernel.use();
-
- Window collapsed_window = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
- Window slice = collapsed_window.first_slice_window_3D();
-
- // Set static kernel arguments
- unsigned int idx = 2 * num_arguments_per_3D_tensor();
- //_kernel.set_argument(idx++, _output->info()->strides_in_bytes()[3]);
- _kernel.set_argument(idx++, uint(_output->info()->dimension(2)));
- _kernel.set_argument(idx++, _input->info()->strides_in_bytes()[2]);
-
- do
- {
- // Set inputs
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, 1, slice);
- add_3D_tensor_argument(idx, _output, 2, slice);
- _kernel.update_shader_params();
- enqueue(*this, slice);
- }
- while(collapsed_window.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
deleted file mode 100644
index c6345ba679..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-GCDepthConcatenateLayerKernel::GCDepthConcatenateLayerKernel()
- : _input(nullptr), _output(nullptr), _depth_offset(0)
-{
-}
-void GCDepthConcatenateLayerKernel::configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(Window::DimX) != output->info()->dimension(Window::DimX));
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(Window::DimY) != output->info()->dimension(Window::DimY));
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2));
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output);
-
- _input = input;
- _output = output;
- _depth_offset = depth_offset;
-
- // Add build options
- std::set<std::string> build_opts;
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- build_opts.emplace(("#define " + dt_name));
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("concatenate_depth", build_opts));
-
- unsigned int num_elems_processed_per_iteration = 1;
- if(input->info()->data_type() == DataType::F16)
- {
- num_elems_processed_per_iteration = 4;
- }
-
- // The window needs to be based on input as we copy all the depths of input
- Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
- win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1));
-
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
- update_window_and_padding(win, input_access, output_access);
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- IGCKernel::configure(win);
-}
-
-void GCDepthConcatenateLayerKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
- _kernel.use();
-
- _output->set_needs_shifting(true);
-
- Window slice_in = window.first_slice_window_3D();
- Window slice_out = window.first_slice_window_3D();
-
- slice_out.set(Window::DimZ, Window::Dimension(_depth_offset));
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, 1, slice_in);
- add_3D_tensor_argument(idx, _output, 2, slice_out);
-
- _kernel.update_shader_params();
-
- enqueue(*this, slice_in);
- }
- while(window.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp
deleted file mode 100644
index c60f4688a6..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-GCDepthwiseConvolutionLayer3x3Kernel::GCDepthwiseConvolutionLayer3x3Kernel()
- : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_left(0), _conv_pad_top(0), _lws(gles::NDRange(1U, 1U, 1U))
-{
-}
-
-BorderSize GCDepthwiseConvolutionLayer3x3Kernel::border_size() const
-{
- return _border_size;
-}
-
-void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
- ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3);
-
- if(biases != nullptr)
- {
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);
- ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(2));
- ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1);
- }
-
- // Get convolved dimensions
- const TensorShape output_shape = compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier);
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(),
- output_shape,
- 1,
- input->info()->data_type());
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
- ARM_COMPUTE_ERROR_ON(output->info()->dimension(2) != weights->info()->dimension(2));
-
- _input = input;
- _output = output;
- _weights = weights;
- _biases = biases;
- _conv_stride_x = conv_info.stride().first;
- _conv_stride_y = conv_info.stride().second;
- _conv_pad_left = conv_info.pad_left();
- _conv_pad_top = conv_info.pad_top();
- _border_size = BorderSize(_conv_pad_top, conv_info.pad_right(), conv_info.pad_bottom(), _conv_pad_left);
-
- // Set build options
- ARM_COMPUTE_ERROR_ON(_conv_stride_x < 1 || _conv_stride_x > 3);
- std::set<std::string> options;
-
- options.emplace("#define DEPTH_MULTIPLIER " + support::cpp11::to_string(depth_multiplier));
- options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0]));
- options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1]));
- options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2]));
- options.emplace("#define STRIDE_X " + support::cpp11::to_string(_conv_stride_x));
- options.emplace("#define STRIDE_Y " + support::cpp11::to_string(_conv_stride_y));
-
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- options.emplace(("#define " + dt_name));
-
- unsigned int num_elems_read_per_iteration_x = 8;
- unsigned int num_elems_read_per_iteration_y = 1;
- unsigned int num_elems_written_per_iteration_x = 4;
- unsigned int num_elems_written_per_iteration_y = 1;
- unsigned int num_elems_written_per_iteration_z = 1;
-
- if((_conv_stride_x == 1) && (_conv_stride_y == 1))
- {
- switch(input->info()->data_type())
- {
-#define PROCESS_4X_3Y_1Z
-
- case DataType::F16:
-#if defined(PROCESS_4X_3Y_1Z)
- options.emplace("#define PROCESS_4X_3Y_1Z");
- num_elems_read_per_iteration_y = 5;
- num_elems_written_per_iteration_y = 3;
-#endif /* PROCESS_4X_3Y_1Z */
-#undef PROCESS_4X_3Y_1Z
- break;
-
- default:
- ARM_COMPUTE_ERROR("Current data type is not supported");
- break;
- }
- }
- else
- {
- switch(input->info()->data_type())
- {
- case DataType::F16:
- options.emplace("#define PROCESS_4X_1Y_1Z");
- break;
-
- default:
- ARM_COMPUTE_ERROR("Current data type is not supported");
- break;
- }
- }
-
- if(_biases != nullptr)
- {
- options.emplace("#define BIAS");
- }
-
- // Create kernel
- std::string kernel_name = "depthwise_convolution_3x3";
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, options));
-
- // Calculate output right and bottom border
- const int output_width = output->info()->dimension(0);
- const int output_height = output->info()->dimension(1);
- const int output_padding_right = ceil_to_multiple(output_width, num_elems_written_per_iteration_x * _lws[0]) - output_width;
- const int output_padding_bottom = ceil_to_multiple(output_height, num_elems_written_per_iteration_y * _lws[1]) - output_height;
-
- // Calculate input right and bottom border
- const int input_width = input->info()->dimension(0);
- const int input_height = input->info()->dimension(1);
-
- const int input_total_width = std::max(int(input->info()->padding().left), int(_conv_pad_left)) + input_width + std::max(int(input->info()->padding().right), int(_conv_pad_left));
- const int input_total_height = std::max(int(input->info()->padding().top), int(_conv_pad_top)) + input_height + std::max(int(input->info()->padding().bottom), int(_conv_pad_top));
-
- const int input_padding_right = ceil_to_multiple(input_total_width, num_elems_read_per_iteration_x * _lws[0]) - input_width - _conv_pad_left;
- const int input_padding_bottom = ceil_to_multiple(input_total_height, num_elems_read_per_iteration_y * _lws[1]) - input_height - _conv_pad_top;
-
- BorderSize border = BorderSize(0, output_padding_right, output_padding_bottom, 0);
-
- Window win = calculate_max_enlarged_window(*output->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y, num_elems_written_per_iteration_z), border);
-
- AccessWindowStatic input_access(input->info(), -_conv_pad_left, -_conv_pad_top, input_width + input_padding_right, input_height + input_padding_bottom);
- AccessWindowStatic weights_access = AccessWindowStatic(nullptr, 0, 0, 0, 0);
- AccessWindowStatic bias_access = AccessWindowStatic(nullptr, 0, 0, 0, 1);
-
- switch(weights->info()->data_type())
- {
- case DataType::F16:
- weights_access = AccessWindowStatic(weights->info(), 0, 0, 4, 3);
- if(_biases != nullptr)
- {
- bias_access = AccessWindowStatic(_biases->info(), 0, 0, _biases->info()->dimension(0) + 1, 1);
- }
- break;
-
- default:
- ARM_COMPUTE_ERROR("Current data type is not supported");
- break;
- }
-
- AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height + output_padding_bottom);
-
- if(_biases != nullptr)
- {
- update_window_and_padding(win, input_access, weights_access, bias_access, output_access);
- }
- else
- {
- update_window_and_padding(win, input_access, weights_access, output_access);
- }
-
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- IGCKernel::configure(win);
-}
-
-void GCDepthwiseConvolutionLayer3x3Kernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- _kernel.use();
-
- _output->set_needs_shifting(true);
-
- // Create input window and adjust
- Window win_in = window;
- win_in.adjust(Window::DimX, -_conv_pad_left, true);
- win_in.adjust(Window::DimY, -_conv_pad_top, true);
- win_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x);
- win_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y);
-
- Window slice_in = win_in.first_slice_window_3D();
- Window slice_out = window.first_slice_window_3D();
- Window slice_weights = window.first_slice_window_3D();
- slice_weights.set_dimension_step(Window::DimX, 0);
- slice_weights.set_dimension_step(Window::DimY, 0);
-
- // Set biases
- if(_biases != nullptr)
- {
- unsigned int idx = 3 * num_arguments_per_3D_tensor();
- Window slice_biases;
- slice_biases.use_tensor_dimensions(_biases->info()->tensor_shape());
- add_1D_tensor_argument(idx, _biases, 4, slice_biases);
- }
-
- slice_out.shift(Window::DimX, -(_output->info()->padding()).left);
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, 1, slice_in);
- add_3D_tensor_argument(idx, _output, 2, slice_out);
- add_3D_tensor_argument(idx, _weights, 3, slice_weights);
-
- _kernel.update_shader_params();
- enqueue(*this, slice_out, _lws);
- }
- while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
deleted file mode 100644
index f3e47d9ae9..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-template <unsigned int kernel_size>
-GCDirectConvolutionLayerKernel<kernel_size>::GCDirectConvolutionLayerKernel()
- : _input(nullptr), _bias(nullptr), _weights(nullptr), _output(nullptr), _border_size(0), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_x(0), _conv_pad_y(0), _lws(gles::NDRange(1U, 1U, 1U))
-{
-}
-
-template <unsigned int kernel_size>
-BorderSize GCDirectConvolutionLayerKernel<kernel_size>::border_size() const
-{
- return _border_size;
-}
-
-template <unsigned int kernel_size>
-void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output,
- const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2));
- ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != weights->info()->dimension(1));
- ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
- ARM_COMPUTE_ERROR_ON_MSG((kernel_size == 3 && std::get<0>(conv_info.stride()) > 2), "Strides larger than 2 not supported in 3x3 direct convolution!");
- ARM_COMPUTE_ERROR_ON(kernel_size != weights->info()->dimension(0));
- ARM_COMPUTE_ERROR_ON(act_info.enabled() && act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU && act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC);
-
- if(bias != nullptr)
- {
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, bias);
- // FIXME: Bug in framework, workaround it in tests currently.
- //ARM_COMPUTE_ERROR_ON(bias->info()->dimension(0) != weights->info()->dimension(3));
- ARM_COMPUTE_ERROR_ON(bias->info()->num_dimensions() > 1);
- }
-
- // Get convolved dimensions
- unsigned int owidth = 0;
- unsigned int oheight = 0;
- std::tie(owidth, oheight) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_size, kernel_size, conv_info);
-
- TensorShape output_shape = input->info()->tensor_shape();
- output_shape.set(0, owidth);
- output_shape.set(1, oheight);
- output_shape.set(2, weights->info()->dimension(3));
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON(!conv_info.padding_is_symmetric());
-
- _conv_stride_x = std::get<0>(conv_info.stride());
- _conv_stride_y = std::get<1>(conv_info.stride());
- _conv_pad_x = std::get<0>(conv_info.pad());
- _conv_pad_y = std::get<1>(conv_info.pad());
-
- _input = input;
- _weights = weights;
- _output = output;
- _bias = bias;
- _border_size = BorderSize(_conv_pad_y, _conv_pad_x);
-
- std::set<std::string> options;
-
- options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0]));
- options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1]));
- options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2]));
- options.emplace("#define STRIDE_X " + support::cpp11::to_string(_conv_stride_x));
- options.emplace("#define STRIDE_Y " + support::cpp11::to_string(_conv_stride_y));
-
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- options.emplace(("#define " + dt_name));
-
- // Activation information in case of a fused activation
- if(act_info.enabled())
- {
- options.emplace("#define FUSED_ACTIVATION");
- options.emplace(("#define " + string_from_activation_func(act_info.activation())));
- options.emplace(("#define ACT_OP " + lower_string(string_from_activation_func(act_info.activation())) + "_op"));
- options.emplace(("#define A_VAL " + float_to_string_with_full_precision(act_info.a())));
- options.emplace(("#define B_VAL " + float_to_string_with_full_precision(act_info.b())));
- }
-
- unsigned int num_elems_read_per_iteration_x = kernel_size * _conv_stride_x;
- unsigned int num_elems_read_per_iteration_y = 1;
- unsigned int num_elems_written_per_iteration_x = 1;
- unsigned int num_elems_written_per_iteration_y = 1;
- unsigned int num_elems_written_per_iteration_z = 1;
-
- if(kernel_size == 3)
- {
- if((_conv_stride_x == 1) && (_conv_stride_y == 1))
- {
- switch(input->info()->data_type())
- {
- case DataType::F16:
- // TODO(APPBROWSER-299): Choose the most optimal path and remove others.
-#define PROCESS_4X_3Y_1Z
-
-#if defined(PROCESS_8X_3Y_1Z)
- options.emplace("#define PROCESS_8X_3Y_1Z");
- num_elems_read_per_iteration_x = 16;
- num_elems_read_per_iteration_y = 5;
- num_elems_written_per_iteration_x = 8;
- num_elems_written_per_iteration_y = 3;
-#elif defined(PROCESS_4X_3Y_1Z)
- options.emplace("#define PROCESS_4X_3Y_1Z");
- num_elems_read_per_iteration_x = 8;
- num_elems_read_per_iteration_y = 5;
- num_elems_written_per_iteration_x = 4;
- num_elems_written_per_iteration_y = 3;
-#elif defined(PROCESS_4X_4Y_1Z)
- options.emplace("#define PROCESS_4X_4Y_1Z");
- num_elems_read_per_iteration_x = 8;
- num_elems_read_per_iteration_y = 6;
- num_elems_written_per_iteration_x = 4;
- num_elems_written_per_iteration_y = 4;
-#elif defined(PROCESS_4X_3Y_2Z)
- options.emplace("#define PROCESS_4X_3Y_2Z");
- num_elems_read_per_iteration_x = 8;
- num_elems_read_per_iteration_y = 5;
- num_elems_written_per_iteration_x = 4;
- num_elems_written_per_iteration_y = 3;
- num_elems_written_per_iteration_z = 2;
-#endif /* PROCESS_nX_nY_nZ */
-#undef PROCESS_8X_3Y_1Z
-#undef PROCESS_4X_3Y_1Z
-#undef PROCESS_4X_4Y_1Z
-#undef PROCESS_4X_3Y_2Z
- break;
-
- case DataType::F32:
- options.emplace("#define PROCESS_4X_3Y_1Z");
- num_elems_read_per_iteration_x = 8;
- num_elems_read_per_iteration_y = 5;
- num_elems_written_per_iteration_x = 4;
- num_elems_written_per_iteration_y = 3;
- break;
-
- default:
- ARM_COMPUTE_ERROR("Current data type is not supported");
- break;
- }
- }
- // FIXME: Just keep one in release
- else
- {
- switch(input->info()->data_type())
- {
- case DataType::F16:
- options.emplace("#define PROCESS_4X_1Y_1Z");
- num_elems_read_per_iteration_x = 8;
- num_elems_written_per_iteration_x = 4;
- break;
-
- case DataType::F32:
- // TODO(APPBROWSER-299): Choose the most optimal path and remove others.
-#define PROCESS_4X_1Y_1Z
-
-#if defined(PROCESS_1X_1Y_1Z)
- options.emplace("#define PROCESS_1X_1Y_1Z");
- num_elems_read_per_iteration_x = 3;
- num_elems_written_per_iteration_x = 1;
-#elif defined(PROCESS_4X_1Y_1Z)
- options.emplace("#define PROCESS_4X_1Y_1Z");
- num_elems_read_per_iteration_x = 8;
- num_elems_written_per_iteration_x = 4;
-#elif defined(PROCESS_8X_1Y_1Z)
- options.emplace("#define PROCESS_8X_1Y_1Z");
- num_elems_read_per_iteration_x = 12;
- num_elems_written_per_iteration_x = 8;
-#else /* PROCESS_nX_nY_nZ */
-#error Have to declare how many elements to process in one thread.
-#endif /* PROCESS_nX_nY_nZ */
-#undef PROCESS_1X_1Y_1Z
-#undef PROCESS_4X_1Y_1Z
-#undef PROCESS_8X_1Y_1Z
- break;
-
- default:
- ARM_COMPUTE_ERROR("Current data type is not supported");
- break;
- }
- }
- }
- else if(kernel_size == 1)
- {
- if(weights->info()->dimension(2) % 2 == 0)
- {
- options.emplace("#define WEIGHTS_OPTIMIZATION");
- }
- switch(input->info()->data_type())
- {
- case DataType::F16:
-#define PROCESS_8X_2Y_1Z
-
-#if defined(PROCESS_4X_1Y_1Z)
- options.emplace("#define PROCESS_4X_1Y_1Z");
- num_elems_read_per_iteration_x = 4;
- num_elems_written_per_iteration_x = 4;
-#elif defined(PROCESS_4X_2Y_1Z)
- options.emplace("#define PROCESS_4X_2Y_1Z");
- num_elems_read_per_iteration_x = 4;
- num_elems_read_per_iteration_y = 2;
- num_elems_written_per_iteration_x = 4;
- num_elems_written_per_iteration_y = 2;
-#elif defined(PROCESS_4X_3Y_1Z)
- options.emplace("#define PROCESS_4X_3Y_1Z");
- num_elems_read_per_iteration_x = 4;
- num_elems_read_per_iteration_y = 3;
- num_elems_written_per_iteration_x = 4;
- num_elems_written_per_iteration_y = 3;
-#elif defined(PROCESS_4X_4Y_1Z)
- options.emplace("#define PROCESS_4X_4Y_1Z");
- num_elems_read_per_iteration_x = 4;
- num_elems_read_per_iteration_y = 4;
- num_elems_written_per_iteration_x = 4;
- num_elems_written_per_iteration_y = 4;
-#elif defined(PROCESS_4X_2Y_2Z)
- ARM_COMPUTE_ERROR_ON_MSG((weights->info()->dimension(4) % 2) == 1, "Current 'weights->info()->dimension(4) % 2) == 1' is not supported");
- options.emplace("#define PROCESS_4X_2Y_2Z");
- num_elems_read_per_iteration_x = 4;
- num_elems_read_per_iteration_y = 2;
- num_elems_written_per_iteration_x = 4;
- num_elems_written_per_iteration_y = 2;
- num_elems_written_per_iteration_z = 2;
-#elif defined(PROCESS_8X_1Y_1Z)
- options.emplace("#define PROCESS_8X_1Y_1Z");
- num_elems_read_per_iteration_x = 8;
- num_elems_written_per_iteration_x = 8;
-#elif defined(PROCESS_8X_2Y_1Z)
- options.emplace("#define PROCESS_8X_2Y_1Z");
- num_elems_read_per_iteration_x = 8;
- num_elems_read_per_iteration_y = 2;
- num_elems_written_per_iteration_x = 8;
- num_elems_written_per_iteration_y = 2;
-#else /* PROCESS_4X_1Y_1Z */
-#error Have to declare how many elements to process in one thread.
-#endif /* PROCESS_4X_1Y_1Z */
-#undef PROCESS_4X_1Y_1Z
-#undef PROCESS_4X_2Y_1Z
-#undef PROCESS_4X_3Y_1Z
-#undef PROCESS_4X_4Y_1Z
-#undef PROCESS_4X_2Y_2Z
-#undef PROCESS_8X_1Y_1Z
-#undef PROCESS_8X_2Y_1Z
- break;
-
- case DataType::F32:
- num_elems_read_per_iteration_x = 1;
- num_elems_written_per_iteration_x = 1;
- break;
-
- default:
- break;
- }
- }
- else if(kernel_size == 5)
- {
- switch(input->info()->data_type())
- {
- case DataType::F16:
- options.emplace("#define PROCESS_4X_1Y_1Z");
- num_elems_read_per_iteration_x = 8;
- num_elems_written_per_iteration_x = 4;
-
- default:
- break;
- }
- }
- else
- {
- }
-
- if(_bias != nullptr)
- {
- options.emplace("#define BIAS");
- }
-
- std::stringstream kernel_name;
- kernel_name << "direct_convolution" << kernel_size << "x" << kernel_size;
-
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name.str(), options));
-
- unsigned int idx = (_bias == nullptr) ? 3 * num_arguments_per_3D_tensor() : (num_arguments_per_1D_tensor() + 3 * num_arguments_per_3D_tensor());
-
- // Calculate output right and bottom border
- const int output_width = output->info()->dimension(0);
- const int output_height = output->info()->dimension(1);
- const int output_padding_right = ceil_to_multiple(output_width, num_elems_written_per_iteration_x * _lws[0]) - output_width;
- const int output_padding_bottom = ceil_to_multiple(output_height, num_elems_written_per_iteration_y * _lws[1]) - output_height;
-
- // Calculate input right and bottom border
- const int input_width = input->info()->dimension(0);
- const int input_height = input->info()->dimension(1);
- const int input_total_width = std::max(int(input->info()->padding().left), int(_conv_pad_x)) + input_width + std::max(int(input->info()->padding().right), int(_conv_pad_x));
- const int input_total_height = std::max(int(input->info()->padding().top), int(_conv_pad_y)) + input_height + std::max(int(input->info()->padding().bottom), int(_conv_pad_y));
- const int padding_right1 = ceil_to_multiple(input_total_width, num_elems_read_per_iteration_x * _lws[0]) - input_width - _conv_pad_x;
- const int padding_bottom1 = ceil_to_multiple(input_total_height, num_elems_read_per_iteration_y * _lws[1]) - input_height - _conv_pad_y;
-
- const int upper_bound_w = ceil_to_multiple(((output_width + output_padding_right) * _conv_stride_x + (kernel_size - 1)), num_elems_read_per_iteration_x * _lws[0]) - _conv_pad_x - input_width;
- const int upper_bound_h = ceil_to_multiple(((output_height + output_padding_bottom) * _conv_stride_y + (kernel_size - 1)), num_elems_read_per_iteration_y * _lws[1]) - _conv_pad_y - input_height;
- const int padding_right2 = std::max(upper_bound_w, _conv_pad_x);
- const int padding_bottom2 = std::max(upper_bound_h, _conv_pad_y);
-
- const int padding_right = std::max(padding_right1, padding_right2);
- const int padding_bottom = std::max(padding_bottom1, padding_bottom2);
-
- BorderSize border = BorderSize(0, output_padding_right, output_padding_bottom, 0);
-
- Window win = calculate_max_enlarged_window(*output->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y, num_elems_written_per_iteration_z), border);
-
- AccessWindowStatic input_access(input->info(), -_conv_pad_x, -_conv_pad_y, input_width + padding_right, input_height + padding_bottom);
- AccessWindowStatic weights_access = AccessWindowStatic(nullptr, 0, 0, 0, 0);
- AccessWindowStatic bias_access = AccessWindowStatic(nullptr, 0, 0, 0, 1);
-
- switch(weights->info()->data_type())
- {
- case DataType::F16:
- if((weights->info()->dimension(2) % 2 != 0) || (kernel_size != 1))
- {
- weights_access = AccessWindowStatic(weights->info(), 0, 0, kernel_size + 1, kernel_size);
- }
- if(_bias != nullptr)
- {
- bias_access = AccessWindowStatic(_bias->info(), 0, 0, _bias->info()->dimension(0) + 1, 1);
- }
- break;
-
- case DataType::F32:
- weights_access = AccessWindowStatic(weights->info(), 0, 0, kernel_size, kernel_size);
- if(_bias != nullptr)
- {
- bias_access = AccessWindowStatic(_bias->info(), 0, 0, _bias->info()->dimension(0), 1);
- }
- break;
-
- default:
- ARM_COMPUTE_ERROR("Current data type is not supported");
- break;
- }
-
- AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height + output_padding_bottom);
-
- if(_bias != nullptr)
- {
- update_window_and_padding(win, input_access, weights_access, bias_access, output_access);
- }
- else
- {
- update_window_and_padding(win, input_access, weights_access, output_access);
- }
-
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- _kernel.set_argument(idx++, _weights->info()->strides_in_bytes()[3]); // weights_stride_w
- _kernel.set_argument(idx++, _weights->info()->dimension(2)); // weights_depth
-
- IGCKernel::configure(win);
-}
-
-template <unsigned int kernel_size>
-void GCDirectConvolutionLayerKernel<kernel_size>::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- _kernel.use();
-
- _output->set_needs_shifting(true);
-
- // Get initial windows
- Window slice = window.first_slice_window_3D();
- Window win_in = window;
-
- win_in.adjust(Window::DimX, -_conv_pad_x, true);
- win_in.adjust(Window::DimY, -_conv_pad_y, true);
- win_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x);
- win_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y);
-
- Window slice_in = win_in.first_slice_window_3D();
-
- unsigned int idx1 = 2 * num_arguments_per_3D_tensor();
- add_3D_tensor_argument(idx1, _weights, 3, slice);
-
- if(_bias != nullptr)
- {
- Window slice_bias;
- slice_bias.use_tensor_dimensions(_bias->info()->tensor_shape());
- add_1D_tensor_argument(idx1, _bias, 4, slice_bias);
- }
-
- slice.shift(Window::DimX, -(_output->info()->padding()).left);
-
- do
- {
- unsigned int idx = 0;
-
- add_3D_tensor_argument(idx, _input, 1, slice_in);
- add_3D_tensor_argument(idx, _output, 2, slice);
-
- _kernel.update_shader_params();
- enqueue(*this, slice, _lws);
- }
- while(window.slide_window_slice_3D(slice) && win_in.slide_window_slice_3D(slice_in));
-}
-
-template class arm_compute::GCDirectConvolutionLayerKernel<1>;
-template class arm_compute::GCDirectConvolutionLayerKernel<3>;
-template class arm_compute::GCDirectConvolutionLayerKernel<5>;
diff --git a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp
deleted file mode 100644
index 9368770e22..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "support/StringSupport.h"
-
-#include <cmath>
-#include <random>
-#include <tuple>
-
-using namespace arm_compute;
-
-GCDropoutLayerKernel::GCDropoutLayerKernel()
- : _input(nullptr), _mask(nullptr), _output(nullptr), _num_elems_processed_per_iteration(0)
-{
-}
-
-void GCDropoutLayerKernel::configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, mask, output);
-
- _input = input;
- _mask = mask;
- _output = output;
-
- std::set<std::string> build_opts;
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- std::string fporbp = forward ? "FORWARD" : "BACKWARD";
- std::random_device rd;
- std::mt19937 mt(rd());
- std::uniform_real_distribution<float> dist(0.f, 1.f);
-
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
- build_opts.emplace("#define RATIO " + support::cpp11::to_string(ratio));
- build_opts.emplace("#define SCALE " + support::cpp11::to_string(1. / (1. - ratio)));
- build_opts.emplace("#define SEED " + support::cpp11::to_string(dist(mt)));
- build_opts.emplace("#define " + dt_name);
- build_opts.emplace("#define " + fporbp);
-
- _num_elems_processed_per_iteration = 4 / input->info()->element_size();
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("dropout", build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration));
-
- output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- IGCKernel::configure(win);
-}
-
-void GCDropoutLayerKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
-
- _kernel.use();
-
- Window slice = window.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
-
- add_3D_tensor_argument(idx, _input, 1, slice);
- add_3D_tensor_argument(idx, _mask, 2, slice);
- add_3D_tensor_argument(idx, _output, 3, slice);
-
- _kernel.update_shader_params();
- enqueue(*this, slice);
- }
- while(window.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp
deleted file mode 100644
index d424f0dc79..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <cstdint>
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-GCFillBorderKernel::GCFillBorderKernel()
- : IGCKernel(), _tensor(nullptr)
-{
-}
-
-bool GCFillBorderKernel::is_parallelisable() const
-{
- return false;
-}
-
-template <class T>
-void GCFillBorderKernel::set_constant_border(unsigned int idx, const PixelValue &constant_border_value)
-{
- T value;
- constant_border_value.get(value);
- _kernel.set_argument(idx, static_cast<T>(value));
-}
-
-void GCFillBorderKernel::configure(const IGCTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON(tensor == nullptr);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::F32, DataType::F16);
- ARM_COMPUTE_ERROR_ON(tensor->info()->num_channels() != 1);
-
- border_size.limit(tensor->info()->padding());
-
- // If there is no border: early exit
- if(border_size.empty() || border_mode == BorderMode::UNDEFINED)
- {
- return;
- }
-
- // Select appropriate kernel
- std::string kernel_name = "fill_image_borders_" + lower_string(string_from_border_mode(border_mode));
-
- // Define build options
- std::set<std::string> build_opts;
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
- build_opts.emplace("#define BORDER_SIZE_TOP " + support::cpp11::to_string(border_size.top));
- build_opts.emplace("#define BORDER_SIZE_BOTTOM " + support::cpp11::to_string(border_size.bottom));
- build_opts.emplace("#define BORDER_SIZE_LEFT " + support::cpp11::to_string(border_size.left));
- build_opts.emplace("#define BORDER_SIZE_RIGHT " + support::cpp11::to_string(border_size.right));
-
- if(border_mode == BorderMode::REPLICATE)
- {
- build_opts.emplace("#define FILL_IMAGE_BORDERS_REPLICATE\n");
- }
- else
- {
- build_opts.emplace("#define FILL_IMAGE_BORDERS_CONSTANT\n");
- }
-
- switch(tensor->info()->data_type())
- {
- case DataType::F16:
- build_opts.emplace("#define DATA_TYPE_FP16");
- break;
-
- case DataType::F32:
- build_opts.emplace("#define DATA_TYPE_FP32");
- break;
-
- default:
- ARM_COMPUTE_ERROR("Current data type is not supported");
- break;
- }
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, build_opts));
- _tensor = tensor;
-
- // Create static kernel arguments
- const unsigned int valid_width = tensor->info()->valid_region().shape[0];
- const unsigned int valid_height = tensor->info()->valid_region().shape[1];
- const unsigned int total_valid_width = border_size.left + valid_width + border_size.right;
-
- // Set static kernel arguments
- unsigned int idx = num_arguments_per_3D_tensor(); //Skip the tensor parameters
- _kernel.set_argument(idx++, valid_width);
- _kernel.set_argument(idx++, valid_height);
- _kernel.set_argument(idx++, tensor->info()->valid_region().anchor[0]);
- _kernel.set_argument(idx++, tensor->info()->valid_region().anchor[1]);
-
- if(BorderMode::CONSTANT == border_mode)
- {
- set_constant_border<float>(idx++, constant_border_value);
- }
-
- // Configure kernel window
- Window win;
- win.set(Window::DimX, Window::Dimension(0, total_valid_width + valid_height));
- win.set(Window::DimY, Window::Dimension(0, 1, 1));
- win.use_tensor_dimensions(tensor->info()->tensor_shape(), Window::DimZ);
-
- IGCKernel::configure(win);
-}
-
-void GCFillBorderKernel::run(const Window &window)
-{
- // Border mode undefined or border width == 0
- if(_kernel.get_program() == 0)
- {
- return;
- }
-
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
-
- _kernel.use();
- Window slice = window.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _tensor, 1, slice);
-
- _kernel.update_shader_params();
-
- enqueue(*this, slice);
- }
- while(window.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp
deleted file mode 100644
index 28be710384..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-GCGEMMInterleave4x4Kernel::GCGEMMInterleave4x4Kernel()
- : _input(nullptr), _output(nullptr)
-{
-}
-
-void GCGEMMInterleave4x4Kernel::configure(const IGCTensor *input, IGCTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-
- TensorShape output_shape = input->info()->tensor_shape();
- output_shape.set(0, input->info()->dimension(0) * 4);
- output_shape.set(1, std::ceil(input->info()->dimension(1) / 4.0f));
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- _input = input;
- _output = output;
-
- std::set<std::string> build_opts;
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- build_opts.emplace(("#define " + dt_name));
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-
- // Create kernel
- build_opts.emplace("#define GEMM_INTERLEAVE4x4");
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("gemm_interleave4x4", build_opts));
-
- // Configure kernel window
- const unsigned int num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(input->info()->data_type());
- constexpr unsigned int num_elems_processed_per_iteration_y = 4;
- const unsigned int num_elems_written_per_iteration = num_elems_processed_per_iteration_x * num_elems_processed_per_iteration_y;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
-
- AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
- AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration, 1, 4.f, 0.25f);
-
- update_window_and_padding(win, input_access, output_access);
-
- output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- IGCKernel::configure(win);
-}
-
-void GCGEMMInterleave4x4Kernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
- _kernel.use();
-
- /*
- * This kernel puts the values in a 4x4 block of Matrix A on the same row (Interleaved values)
- * |a00 a01 a02 a03|
- * |a10 a11 a12 a13|
- * |a20 a21 a22 a23| = | a00 a10 a20 a30 || a01 a11 a21 a31 || a02 a12 a22 a32 || a03 a13 a23 a33 |
- * |a30 a31 a32 a33|
- *
- * After this operation, the output matrix will have the following shape: [ height * 4, width / 4 ]
- */
- Window in_slice = window.first_slice_window_2D();
- Window out_slice = window.first_slice_window_2D();
-
- // Change x and y steps for the slide of output tensor
- out_slice.scale(Window::DimX, 4.f);
- out_slice.scale(Window::DimY, 0.25f);
-
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, 1, in_slice);
- add_2D_tensor_argument(idx, _output, 2, out_slice);
-
- _kernel.update_shader_params();
-
- enqueue(*this, in_slice);
- }
- while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
deleted file mode 100644
index f4c84f3d66..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-GCGEMMMatrixAccumulateBiasesKernel::GCGEMMMatrixAccumulateBiasesKernel()
- : _accum(nullptr), _biases(nullptr), _lws(gles::NDRange(1U, 1U, 1U))
-{
-}
-
-void GCGEMMMatrixAccumulateBiasesKernel::configure(IGCTensor *accum, const IGCTensor *biases)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
- ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() != 1);
-
- _biases = biases;
- _accum = accum;
-
- std::set<std::string> build_opts;
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0]));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1]));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2]));
-
- // Create kernel
- build_opts.emplace("#define GEMM_ACCUMULATE_BIASES");
-
-#define ACCUM_PROCESS_4X
-
-#if defined(ACCUM_PROCESS_4X)
- build_opts.emplace("#define ACCUM_PROCESS_4X");
-#elif defined(ACCUM_PROCESS_8X) /* ACCUM_PROCESS_4X */
- build_opts.emplace("#define ACCUM_PROCESS_8X");
-#endif /* ACCUM_PROCESS_4X */
- std::string dt_name = (accum->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- build_opts.emplace(("#define " + dt_name));
-
- _kernel = GCKernelLibrary::get().create_kernel("gemm_accumulate_biases", build_opts);
-
- // Configure kernel window
- unsigned int num_elems_processed_per_iteration = 1;
-
- if(_accum->info()->data_type() == DataType::F32)
- {
- num_elems_processed_per_iteration = 16;
- }
- else if(_accum->info()->data_type() == DataType::F16)
- {
-#if defined(ACCUM_PROCESS_4X)
- num_elems_processed_per_iteration = 4;
-#elif defined(ACCUM_PROCESS_8X) /* ACCUM_PROCESS_4X */
- num_elems_processed_per_iteration = 8;
-#endif /* ACCUM_PROCESS_4X */
- }
-
- const int accum_width = accum->info()->dimension(0);
- const int accum_padding_right = ceil_to_multiple(accum_width, num_elems_processed_per_iteration * _lws[0]) - accum_width;
- BorderSize border = BorderSize(0, accum_padding_right, 0, 0);
-
- Window win = calculate_max_enlarged_window(*_accum->info(), Steps(num_elems_processed_per_iteration), border);
-
- AccessWindowStatic biases_access(biases->info(), 0, 0, ceil_to_multiple(biases->info()->dimension(0), num_elems_processed_per_iteration * _lws[0]), biases->info()->dimension(1));
- AccessWindowStatic accum_access(_accum->info(), 0, 0, accum_width + accum_padding_right, _accum->info()->dimension(1));
-
- update_window_and_padding(win, biases_access, accum_access);
-
- IGCKernel::configure(win);
-}
-
-void GCGEMMMatrixAccumulateBiasesKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
-
- _kernel.use();
-
- Window accum_slice = window.first_slice_window_2D();
-
- Window biases_slice(accum_slice);
- biases_slice.set(Window::DimY, Window::Dimension(0, 1, 1));
-
- // Run kernel
- do
- {
- // Set arguments
- unsigned int idx = 0;
-
- add_2D_tensor_argument(idx, _accum, 1, accum_slice);
- add_1D_tensor_argument(idx, _biases, 2, biases_slice);
- _kernel.update_shader_params();
-
- enqueue(*this, accum_slice, _lws);
- }
- while(window.slide_window_slice_2D(accum_slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp
deleted file mode 100644
index 0429824b04..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-GCGEMMMatrixAdditionKernel::GCGEMMMatrixAdditionKernel()
- : _input(nullptr), _output(nullptr)
-{
-}
-
-void GCGEMMMatrixAdditionKernel::configure(const IGCTensor *input, IGCTensor *output, float beta)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0));
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1));
-
- _input = input;
- _output = output;
- const unsigned int num_elems_processed_per_iteration = max_gc_vector_width / data_size_from_type(input->info()->data_type());
-
- std::set<std::string> build_opts;
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- build_opts.emplace(("#define " + dt_name));
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
- build_opts.emplace("#define BETA " + float_to_string_with_full_precision(beta));
-
- // Create kernel
- build_opts.emplace("#define GEMM_MATRIXADDITION");
- std::string data_type_name = lower_string(string_from_data_type(input->info()->data_type()));
- _kernel = GCKernelLibrary::get().create_kernel(("gemm_ma"), build_opts);
-
- // Configure kernel window
- Window win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region());
-
- IGCKernel::configure(win);
-}
-
-void GCGEMMMatrixAdditionKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
- _kernel.use();
-
- Window slice = window.first_slice_window_2D();
-
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, 1, slice);
- add_2D_tensor_argument(idx, _output, 2, slice);
-
- _kernel.update_shader_params();
-
- enqueue(*this, slice);
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
deleted file mode 100644
index 2a85e0d77d..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/AccessWindowTranspose.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-namespace
-{
-using ElementsProcessed = Steps;
-
-inline Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
-{
- ARM_COMPUTE_UNUSED(reshape_info);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 3, "The number of dimensions for the matrix B must be <= 3");
-
- if(!is_interleaved_transposed)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != input1->dimension(1));
-
- if(output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != output->dimension(0));
- ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != output->dimension(1));
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output);
- }
- }
- else
- {
- const int m = reshape_info.m();
- const int n = reshape_info.n();
- const int k = reshape_info.k();
- const int mult_transpose1xW_width = reshape_info.mult_transpose1xW_width();
- const int mult_interleave4x4_height = reshape_info.mult_interleave4x4_height();
-
- TensorShape tensor_shape0{ input0->tensor_shape() };
- tensor_shape0.set(0, k);
- tensor_shape0.set(1, m);
-
- TensorShape tensor_shape1{ input1->tensor_shape() };
- tensor_shape1.set(0, n);
- tensor_shape1.set(1, k);
-
- const TensorInfo tensor_info0 = input0->clone()->set_tensor_shape(tensor_shape0);
- const TensorInfo tensor_info1 = input1->clone()->set_tensor_shape(tensor_shape1);
-
- const TensorInfo tensor_info_reshaped0 = input0->clone()->set_tensor_shape(compute_interleaved_shape(tensor_info0, mult_interleave4x4_height));
- const TensorInfo tensor_info_reshaped1 = input1->clone()->set_tensor_shape(compute_transpose1xW_with_element_size_shape(tensor_info1, mult_transpose1xW_width));
-
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input0, &tensor_info_reshaped0);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, &tensor_info_reshaped1);
-
- if(output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != static_cast<size_t>(n));
- ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != static_cast<size_t>(m));
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output);
- }
- }
-
- return Status{};
-}
-
-inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output,
- bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info,
- GPUTarget gpu_target, ElementsProcessed &num_elements_processed)
-{
- ARM_COMPUTE_UNUSED(gpu_target);
-
- // Output tensor auto inizialitation if not yet initialized
- TensorShape tensor_shape{ input0->tensor_shape() };
- tensor_shape.set(0, is_interleaved_transposed ? reshape_info.n() : input1->dimension(0));
- tensor_shape.set(1, is_interleaved_transposed ? reshape_info.m() : input0->dimension(1));
-
- auto_init_if_empty(*output, input0->clone()->set_tensor_shape(tensor_shape));
-
- bool window_changed = false;
- Window win{};
-
- const DataType data_type = input0->data_type();
- unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
- unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
-
- if(is_interleaved_transposed)
- {
- // Configure window kernel
- num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(data_type);
- num_elems_processed_per_iteration_y = 4;
-
- win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
-
- AccessWindowRectangle input0_access(input0, 0, 0, num_elems_processed_per_iteration_y, 1, 1.f, 0.25f);
- AccessWindowTranspose input1_access(input1, 0, 0, num_elems_processed_per_iteration_x, 1, 0.f, 0.25f);
- AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
-
- update_window_and_padding(win, input0_access, input1_access, output_access);
-
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
- }
- else // The input tensors have not been reshaped
- {
- // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor.
- num_elems_processed_per_iteration_y = std::min(static_cast<int>(output->dimension(1)), 4);
-
- switch(data_type)
- {
- case DataType::F16:
- num_elems_processed_per_iteration_x = 4;
- break;
-
- case DataType::F32:
- num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(data_type);
- break;
-
- default:
- ARM_COMPUTE_ERROR("Current data type is not supported");
- break;
- }
-
- win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
-
- AccessWindowStatic input0_access(input0, 0, 0, ceil_to_multiple(input0->dimension(0), 8), ceil_to_multiple(input0->dimension(1), num_elems_processed_per_iteration_y));
- AccessWindowStatic input1_access(input1, 0, 0, ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration_x), input1->dimension(1));
- AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
-
- update_window_and_padding(win, input0_access, input1_access, output_access);
-
- Coordinates coord;
- coord.set_num_dimensions(output->num_dimensions());
- output_access.set_valid_region(win, ValidRegion(coord, output->tensor_shape()));
- }
-
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-GCGEMMMatrixMultiplyKernel::GCGEMMMatrixMultiplyKernel()
- : _input0(nullptr), _input1(nullptr), _output(nullptr)
-{
-}
-
-void GCGEMMMatrixMultiplyKernel::configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
-
- // Perform validate step
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info));
-
- _input0 = input0;
- _input1 = input1;
- _output = output;
-
- // Get target architecture
- GPUTarget gpu_target = get_target();
-
- ElementsProcessed num_elements_processed{};
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info, gpu_target, num_elements_processed);
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- IGCKernel::configure(win_config.second);
-
- // Create build options
- std::set<std::string> build_opts;
- std::string kernel_name;
-
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
- build_opts.emplace("#define COLS_A " + support::cpp11::to_string(input0->info()->dimension(0)));
- build_opts.emplace("#define COLS_B " + support::cpp11::to_string(input1->info()->dimension(0)));
- build_opts.emplace("#define ALPHA " + float_to_string_with_full_precision(alpha));
-
- // Check if the output tensor is a vector. If so,the kernel runs the vector-matrix multiplication
- if(is_interleaved_transposed)
- {
- const int mult_transpose1xW_width = reshape_info.mult_transpose1xW_width();
- const int mult_interleave4x4_height = reshape_info.mult_interleave4x4_height();
-
- build_opts.emplace("#define MULT_TRANSPOSE1XW_WIDTH " + support::cpp11::to_string(mult_transpose1xW_width));
- build_opts.emplace("#define MULT_INTERLEAVE4X4_HEIGHT " + support::cpp11::to_string(mult_interleave4x4_height));
-
- switch(input0->info()->data_type())
- {
- case DataType::F16:
- build_opts.emplace("#define DATA_TYPE_FP16");
- break;
-
- case DataType::F32:
- build_opts.emplace("#define DATA_TYPE_FP32");
- break;
-
- default:
- ARM_COMPUTE_ERROR("Current data type is not supported");
- break;
- }
-
- build_opts.emplace("#define GEMM_MM_INTERLEAVED_TRANSPOSED");
-
- kernel_name = "gemm_mm_interleaved_transposed";
- }
- else
- {
- // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor
-
- GPUTarget arch_target = get_arch_from_target(gpu_target);
- switch(input0->info()->data_type())
- {
- case DataType::F16:
- build_opts.emplace("#define DATA_TYPE_FP16");
- build_opts.emplace("#define MM_PROCESS_4X_OPTIMIZED");
- build_opts.emplace("#define GEMM_MM_FLOATING_POINT");
- break;
-
- case DataType::F32:
- build_opts.emplace("#define DATA_TYPE_FP32");
-
- if(arch_target == GPUTarget::BIFROST && input0->info()->num_dimensions() != 1)
- {
- build_opts.emplace("#define GEMM_MM_FLOATING_POINT_BIFROST");
- }
- else
- {
- build_opts.emplace("#define GEMM_MM_FLOATING_POINT");
- }
- break;
-
- default:
- ARM_COMPUTE_ERROR("Current data type is not supported");
- break;
- }
-
- build_opts.emplace("#define NUM_ELEMS_PROCESSED_PER_THREAD_X " + support::cpp11::to_string(num_elements_processed.x()));
- build_opts.emplace("#define NUM_ELEMS_PROCESSED_PER_THREAD_Y " + support::cpp11::to_string(num_elements_processed.y()));
-
- kernel_name = "gemm_mm_floating_point";
- }
-
- // Create kernel
- _kernel = GCKernelLibrary::get().create_kernel(kernel_name, build_opts);
-}
-
-Status GCGEMMMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved_transposed,
- const GEMMReshapeInfo &reshape_info, GPUTarget gpu_target)
-{
- ARM_COMPUTE_UNUSED(alpha);
- ElementsProcessed num_elements_processed{};
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output, is_interleaved_transposed, reshape_info));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input0->clone().get(),
- input1->clone().get(),
- output->clone().get(),
- is_interleaved_transposed,
- reshape_info,
- gpu_target,
- num_elements_processed)
- .first);
- return Status{};
-}
-
-void GCGEMMMatrixMultiplyKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
- _kernel.use();
-
- Window slice = window.first_slice_window_2D();
- Window slice_matrix_b = slice;
-
- slice_matrix_b.set(Window::DimX, Window::Dimension(0, 1, 1));
- slice_matrix_b.set(Window::DimY, Window::Dimension(0, 1, 1));
-
- do
- {
- Window slice_b = slice;
- // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A more than 2
- // This scenario can happen when the the matrix multiplication is used to perform a convolution operation
- if(_input1->info()->num_dimensions() < 3)
- {
- slice_b = slice_matrix_b;
- }
-
- unsigned int idx = 0;
-
- add_2D_tensor_argument(idx, _input0, 1, slice);
- add_2D_tensor_argument(idx, _input1, 2, slice_b);
- add_2D_tensor_argument(idx, _output, 3, slice);
- _kernel.update_shader_params();
- enqueue(*this, slice);
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp
deleted file mode 100644
index 6ebd8dd6e4..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h"
-
-#include "arm_compute/core/AccessWindowTranspose.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <cmath>
-
-using namespace arm_compute;
-
-void GCGEMMTranspose1xWKernel::configure(const IGCTensor *input, IGCTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-
- TensorShape output_shape{ input->info()->tensor_shape() };
- const size_t transpose_w = 16 / input->info()->element_size();
- output_shape.set(0, input->info()->dimension(1) * transpose_w);
- output_shape.set(1, static_cast<size_t>(std::ceil((input->info()->dimension(0) / static_cast<float>(transpose_w)))));
-
- // Output tensor auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
-
- const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
- const int scale_x = num_elems_processed_per_iteration;
-
- _input = input;
- _output = output;
-
- std::set<std::string> build_opts;
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- build_opts.emplace(("#define " + dt_name));
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
- /*
- * Following an example of how the transposition1xW works when the input data type is F32
- *
- * |a00 a01 a02 a03|
- * |a10 a11 a12 a13|
- * |a20 a21 a22 a23| = | a00 a01 a02 a03 || a10 a11 a12 a13 || a20 a21 a22 a23 || a30 a31 a32 a33 |
- * |a30 a31 a32 a33|
- *
- * The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
- */
- // Create kernel
- build_opts.emplace("#define GEMM_TRANSPOSE1xW");
- _kernel = GCKernelLibrary::get().create_kernel("gemm_transpose1x4", build_opts);
-
- // Configure window
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
- ARM_COMPUTE_ERROR_ON_MSG((win.x().end() / scale_x) == 0, "Transposed shape would be 0 in the second dimension");
-
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowTranspose output_access(output->info(), 0, 0, num_elems_processed_per_iteration, 1, scale_x, 1.f / scale_x);
-
- update_window_and_padding(win, input_access, output_access);
-
- output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- IGCKernel::configure(win);
-}
-
-void GCGEMMTranspose1xWKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
- _kernel.use();
-
- // Output is transposed
- Window out_window(window);
- out_window.set(Window::DimX, window.y());
- out_window.set(Window::DimY, window.x());
-
- Window in_slice = window.first_slice_window_2D();
- Window out_slice = out_window.first_slice_window_2D();
-
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, 1, in_slice);
- add_2D_tensor_argument(idx, _output, 2, out_slice);
-
- _kernel.update_shader_params();
-
- enqueue(*this, in_slice);
- }
- while(window.slide_window_slice_2D(in_slice) && out_window.slide_window_slice_2D(out_slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
deleted file mode 100644
index 1890cf7e04..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "support/StringSupport.h"
-
-#include <cmath>
-#include <tuple>
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
-
- // Checks performed when output is configured
- if(output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- }
-
- return Status{};
-}
-} // namespace
-
-GCIm2ColKernel::GCIm2ColKernel()
- : _input(nullptr), _output(nullptr), _convolved_dims(), _kernel_dims(), _num_elems_processed_per_iteration(1), _run_func(nullptr)
-{
-}
-
-void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- // Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
-
- _input = input;
- _output = output;
-
- // Create kernel
- std::set<std::string> build_opts;
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
- build_opts.insert("#define " + dt_name);
-
- if(has_bias)
- {
- build_opts.emplace("#define HAS_BIAS");
- }
-
- int stride_x = 0;
- int stride_y = 0;
-
- std::tie(stride_x, stride_y) = conv_info.stride();
- _kernel_dims = std::make_pair(kernel_dims.width, kernel_dims.height);
-
- const bool run_img2col_reduced = (output->info()->dimension(0) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))) && (TensorShape::num_max_dimensions >= 4)
- && (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1))
- && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding())
- && (dilation == Size2D(1U, 1U));
-
- std::string kernel_name = "im2col_generic";
- if(!run_img2col_reduced)
- {
- if(input->info()->data_type() == DataType::F16 && _kernel_dims == std::pair<unsigned int, unsigned int>(1, 1))
- {
- build_opts.emplace("#define KERNEL_1x1");
- }
-
- build_opts.emplace("#define IM2COL_GENERIC");
- _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
- kernel_dims.width, kernel_dims.height,
- conv_info, dilation);
- _num_elems_processed_per_iteration = (input->info()->data_type() == DataType::F32) ? 1 : 2;
-
- build_opts.emplace("#define KERNEL_WIDTH " + support::cpp11::to_string(kernel_dims.width));
- build_opts.emplace("#define KERNEL_HEIGHT " + support::cpp11::to_string(kernel_dims.height));
- build_opts.emplace("#define KERNEL_DEPTH " + support::cpp11::to_string(input->info()->dimension(2)));
- build_opts.emplace("#define CONVOLVED_WIDTH " + support::cpp11::to_string(_convolved_dims.first));
- build_opts.emplace("#define CONVOLVED_HEIGHT " + support::cpp11::to_string(_convolved_dims.second));
- build_opts.emplace("#define STRIDE_X " + support::cpp11::to_string(conv_info.stride().first));
- build_opts.emplace("#define STRIDE_Y " + support::cpp11::to_string(conv_info.stride().second));
- build_opts.emplace("#define PAD_LEFT " + support::cpp11::to_string(conv_info.pad_left()));
- build_opts.emplace("#define PAD_TOP " + support::cpp11::to_string(conv_info.pad_top()));
- build_opts.emplace("#define PAD_RIGHT " + support::cpp11::to_string(conv_info.pad_right()));
- build_opts.emplace("#define PAD_BOTTOM " + support::cpp11::to_string(conv_info.pad_bottom()));
- build_opts.emplace("#define SRC_WIDTH " + support::cpp11::to_string(input->info()->dimension(0)));
- build_opts.emplace("#define SRC_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1)));
- build_opts.emplace("#define DILATION_X " + support::cpp11::to_string(dilation.x()));
- build_opts.emplace("#define DILATION_Y " + support::cpp11::to_string(dilation.y()));
-
- _run_func = &GCIm2ColKernel::run_generic;
- }
- else
- {
- build_opts.emplace("#define IM2COL_REDUCED");
- kernel_name = "im2col_reduced";
-
- if(input->info()->data_type() == DataType::F32)
- {
- _num_elems_processed_per_iteration = 4 / input->info()->element_size();
- }
- else if(input->info()->data_type() == DataType::F16)
- {
- int input_width = input->info()->dimension(0);
- int input_height = input->info()->dimension(1);
-
- build_opts.emplace("#define IMAGE_SIZE " + support::cpp11::to_string(input_width * input_height));
- if(input_width % 8 == 0)
- {
- _num_elems_processed_per_iteration = 8;
- build_opts.emplace("#define IM2COL_REDUCED_8X");
- }
- else if(input_width % 4 == 0)
- {
- _num_elems_processed_per_iteration = 4;
- build_opts.emplace("#define IM2COL_REDUCED_4X");
- }
- else if(input_width % 2 == 0)
- {
- _num_elems_processed_per_iteration = 2;
- build_opts.emplace("#define IM2COL_REDUCED_2X");
- }
- else
- {
- _num_elems_processed_per_iteration = 2;
- build_opts.emplace("#define IM2COL_REDUCED_GENERIC");
- }
- }
-
- _run_func = &GCIm2ColKernel::run_reduced;
- }
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration));
-
- if(input->info()->data_type() == DataType::F16)
- {
- // Calculate input right and bottom border
- const int input_width = input->info()->dimension(0);
- const int input_height = input->info()->dimension(1);
- int input_total_width = input->info()->padding().left + input_width + input->info()->padding().right;
- int input_padding_right = ceil_to_multiple(input_total_width, _num_elems_processed_per_iteration) - input_total_width;
- input_total_width = input_width + input_padding_right + input->info()->padding().right;
- AccessWindowStatic input_access(input->info(), 0, 0, input_total_width, input_height);
-
- // Calculate output right and bottom border
- const int output_width = output->info()->dimension(0);
- const int output_height = output->info()->dimension(1);
- const int output_padding_right = ceil_to_multiple(output_width, _num_elems_processed_per_iteration) - output_width;
- AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height);
-
- update_window_and_padding(win, input_access, output_access);
- }
-
- output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- if(!run_img2col_reduced)
- {
- // set the Z dimension's step same size as the whole dimension so that one can't split across the Z dimension
- win.set_dimension_step(Window::DimZ, win[Window::DimZ].end() - win[Window::DimZ].start());
- }
-
- IGCKernel::configure(win);
-}
-
-Status GCIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
-{
- ARM_COMPUTE_UNUSED(kernel_dims);
- ARM_COMPUTE_UNUSED(conv_info);
- ARM_COMPUTE_UNUSED(has_bias);
- ARM_COMPUTE_UNUSED(dilation);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
- return Status{};
-}
-
-void GCIm2ColKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON(_run_func == nullptr);
- (this->*_run_func)(window);
-}
-
-void GCIm2ColKernel::run_generic(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
-
- // Get initial windows
- Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
-
- // Change the Z dimension's step back to 1
- window_collapsed.set_dimension_step(Window::DimZ, 1);
-
- Window slice = window_collapsed.first_slice_window_3D();
- Window slice_in = window_collapsed.first_slice_window_3D();
- Window slice_out = window_collapsed.first_slice_window_3D();
-
- // Setup slice
- slice.set(Window::DimX, Window::Dimension(0, static_cast<int>(_convolved_dims.first), 1));
- slice.set(Window::DimY, Window::Dimension(0, static_cast<int>(_convolved_dims.second), 1));
-
- // Setup output slice
- slice_out.set(Window::DimX, Window::Dimension(0, _output->info()->dimension(0), _num_elems_processed_per_iteration));
- slice_out.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 1));
- slice_out.set(Window::DimZ, Window::Dimension(0, 1, 1));
-
- // we need top/left pad to be included in valid region
- if(_input->info()->data_type() == DataType::F16)
- {
- (dynamic_cast<TensorInfo *>(_input->info()))->init(_input->info()->tensor_shape(), _input->info()->num_channels(), _input->info()->data_type(), _input->info()->strides_in_bytes(), 0,
- _input->info()->total_size());
- }
-
- _kernel.use();
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, 1, slice_in);
- add_2D_tensor_argument(idx, _output, 2, slice_out);
- _kernel.set_argument(idx++, static_cast<unsigned int>(_input->info()->strides_in_bytes()[3]));
- _kernel.set_argument(idx++, static_cast<unsigned int>(_output->info()->strides_in_bytes()[3]));
- _kernel.update_shader_params();
-
- enqueue(*this, slice);
- }
- while(window_collapsed.slide_window_slice_3D(slice) && window_collapsed.slide_window_slice_3D(slice_out) && window_collapsed.slide_window_slice_3D(slice_in));
-}
-
-void GCIm2ColKernel::run_reduced(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
-
- Window out_window;
- out_window.use_tensor_dimensions(_output->info()->tensor_shape());
-
- Window out_slice = out_window.first_slice_window_1D();
- Window in_slice = window.first_slice_window_3D();
-
- _kernel.use();
-
- // Run kernel
- do
- {
- // Set arguments
- unsigned int idx = 0;
-
- add_3D_tensor_argument(idx, _input, 1, in_slice);
- add_1D_tensor_argument(idx, _output, 2, out_slice);
- _kernel.set_argument(idx++, _input->info()->dimension(0));
- _kernel.set_argument(idx++, _input->info()->dimension(1));
- _kernel.update_shader_params();
-
- enqueue(*this, in_slice);
- }
- while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp
deleted file mode 100644
index 094d895442..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h"
-
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <string>
-
-using namespace arm_compute;
-
-GCNormalizationLayerKernel::GCNormalizationLayerKernel()
- : _input(nullptr), _squared_input(nullptr), _output(nullptr), _border_size(0)
-{
-}
-
-BorderSize GCNormalizationLayerKernel::border_size() const
-{
- return _border_size;
-}
-
-void GCNormalizationLayerKernel::configure(const IGCTensor *input, const IGCTensor *squared_input, IGCTensor *output, NormalizationLayerInfo norm_info)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd");
- ARM_COMPUTE_ERROR_ON_MSG(norm_info.type() == NormType::IN_MAP_2D, "2D In-Map Normalization not implemented");
-
- // Set build options
- std::set<std::string> build_opts;
-
- _input = input;
- _squared_input = squared_input;
- _output = output;
-
- const bool is_in_map = norm_info.is_in_map();
- const unsigned int border_width = is_in_map ? std::min(norm_info.norm_size() / 2, 3U) : 0;
- _border_size = BorderSize(0, border_width);
-
- // Set kernel static arguments
- std::string func_name = ((norm_info.type() == NormType::IN_MAP_1D) ? "IN_MAP_1D" : "CROSS_MAP");
- build_opts.emplace(("#define " + func_name));
- build_opts.emplace(("#define COEFF " + float_to_string_with_full_precision(norm_info.scale_coeff())));
- build_opts.emplace(("#define BETA " + float_to_string_with_full_precision(norm_info.beta())));
- build_opts.emplace(("#define KAPPA " + float_to_string_with_full_precision(norm_info.kappa())));
- build_opts.emplace(("#define RADIUS " + support::cpp11::to_string(norm_info.norm_size() / 2)));
- build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)));
- build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)));
- build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)));
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("normalization_layer", build_opts));
-
- // Configure kernel window
- const unsigned int num_elems_processed_per_iteration = 1;
- const unsigned int num_elems_read_per_iteration = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2);
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal input_access(input->info(), -_border_size.left, num_elems_read_per_iteration);
- AccessWindowHorizontal squared_input_access(squared_input->info(), -_border_size.left, num_elems_read_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, input_access, squared_input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region());
-
- IGCKernel::configure(win);
-}
-
-void GCNormalizationLayerKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- _kernel.use();
-
- Window slice = window.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
- unsigned int binding = 1;
- add_3D_tensor_argument(idx, _input, binding++, slice);
- add_3D_tensor_argument(idx, _squared_input, binding++, slice);
- add_3D_tensor_argument(idx, _output, binding++, slice);
-
- _kernel.update_shader_params();
-
- enqueue(*this, slice);
- }
- while(window.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp
deleted file mode 100644
index ff885da879..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, std);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, std);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(mean->num_dimensions() > 1, "mean and std must be vectors");
-
- const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) != mean->dimension(0));
-
- // Checks performed when output is configured
- if(output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
- }
-
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, ITensorInfo *mean, ITensorInfo *std)
-{
- // Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output, *input->clone());
-
- const unsigned int num_elems_processed_per_iteration = 4;
-
- // Configure kernel window
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- const int mean_padding = ceil_to_multiple(mean->dimension(0), num_elems_processed_per_iteration) - mean->dimension(0);
- const int std_padding = ceil_to_multiple(std->dimension(0), num_elems_processed_per_iteration) - std->dimension(0);
- AccessWindowStatic mean_access(mean, 0, 0, mean->dimension(0) + mean_padding, mean->dimension(1));
- AccessWindowStatic std_access(std, 0, 0, std->dimension(0) + std_padding, std->dimension(1));
-
- const bool window_changed = update_window_and_padding(win, input_access, output_access, mean_access, std_access);
- output_access.set_valid_region(win, input->valid_region());
-
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-GCNormalizePlanarYUVLayerKernel::GCNormalizePlanarYUVLayerKernel()
- : _input(nullptr), _output(nullptr), _mean(nullptr), _std(nullptr)
-{
-}
-
-void GCNormalizePlanarYUVLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *std)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, std);
-
- // Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), *input->info()->clone());
-
- // Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), mean->info(), std->info()));
-
- _input = input;
- _output = output;
- _mean = mean;
- _std = std;
-
- // Set build options
- std::set<std::string> build_opts;
- build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)));
- build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)));
- build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)));
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("normalize_planar_yuv_layer", build_opts));
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info(), mean->info(), std->info());
- ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
-
- IGCKernel::configure(std::get<1>(win_config));
-}
-
-Status GCNormalizePlanarYUVLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, mean, std));
- ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get(), mean->clone().get(), std->clone().get())));
- return Status{};
-}
-
-void GCNormalizePlanarYUVLayerKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- _kernel.use();
-
- _output->set_needs_shifting(true);
-
- Window slice = window.first_slice_window_3D();
-
- Window slice_in;
- //slice_in.use_tensor_dimensions(_mean->info()->tensor_shape());
- slice_in = window.first_slice_window_1D();
- slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
-
- unsigned int idx = 2 * num_arguments_per_3D_tensor();
- add_1D_tensor_argument(idx, _mean, 3, slice_in);
- add_1D_tensor_argument(idx, _std, 4, slice_in);
-
- slice_in = window.first_slice_window_3D();
-
- slice.shift(Window::DimX, -(_output->info()->padding()).left);
-
- do
- {
- idx = 0;
- add_3D_tensor_argument(idx, _input, 1, slice_in);
- add_3D_tensor_argument(idx, _output, 2, slice);
-
- _kernel.update_shader_params();
-
- enqueue(*this, slice);
- }
- while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp
deleted file mode 100644
index 69c97a846a..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <cmath>
-#include <cstdlib>
-#include <set>
-#include <string>
-using namespace arm_compute;
-
-GCPixelWiseMultiplicationKernel::GCPixelWiseMultiplicationKernel()
- : _input1(nullptr), _input2(nullptr), _output(nullptr)
-{
-}
-
-void GCPixelWiseMultiplicationKernel::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
- ARM_COMPUTE_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_ERROR_ON_MSG(scale < 0, "Scale cannot be negative. ");
-
- // Auto initialize output if not initialized
- {
- set_shape_if_empty(*output->info(), input1->info()->tensor_shape());
- set_format_if_unknown(*output->info(), Format::F32);
- }
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output);
- ARM_COMPUTE_ERROR_ON_MSG(scale < 0, "Scale cannot be negative. ");
-
- _input1 = input1;
- _input2 = input2;
- _output = output;
-
- std::string data_type;
- std::string compute_type;
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-
- build_opts.emplace("#define SCALE " + support::cpp11::to_string(scale));
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("pixelwise_mul_float", build_opts));
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 1;
-
- Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal input2_access(input2->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, input1_access, input2_access, output_access);
-
- ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(),
- input2->info()->valid_region());
- output_access.set_valid_region(win, valid_region);
-
- IGCKernel::configure(win);
-}
-
-void GCPixelWiseMultiplicationKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
- _kernel.use();
-
- Window slice = window.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
- unsigned int binding = 1;
- add_3D_tensor_argument(idx, _input1, binding++, slice);
- add_3D_tensor_argument(idx, _input2, binding++, slice);
- add_3D_tensor_argument(idx, _output, binding++, slice);
-
- _kernel.update_shader_params();
- enqueue(*this, slice);
- }
- while(window.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
deleted file mode 100644
index 36499eb4fd..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
+++ /dev/null
@@ -1,372 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-#include <tuple>
-
-using namespace arm_compute;
-
-namespace
-{
-// Internal window config info
-using GCPoolingConfig = std::pair<unsigned int, BorderSize>; //num_elems_processed_per_iteration, border_size
-
-void auto_init(const ITensorInfo *input, ITensorInfo *output, unsigned int pooled_w, unsigned int pooled_h)
-{
- TensorShape output_shape{ input->tensor_shape() };
- output_shape.set(0, pooled_w);
- output_shape.set(1, pooled_h);
-
- auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape));
-}
-
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(indices, "Indices not supported in GLES backend");
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((is_data_type_quantized_asymmetric(input->data_type()) && pool_info.pool_type == PoolingType::L2),
- "Unsupported combination of parameters!");
- ARM_COMPUTE_RETURN_ERROR_ON(!pool_info.pad_stride_info.padding_is_symmetric());
-
- const bool is_global_pooling = pool_info.is_global_pooling;
- const unsigned int pool_size = is_global_pooling ? input->tensor_shape().x() : pool_info.pool_size.width;
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_global_pooling && (input->tensor_shape().x() != input->tensor_shape().y()),
- "Global pooling is supported only with rectangular inputs!");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_global_pooling && ((pool_info.pad_stride_info.pad().first >= pool_size) || (pool_info.pad_stride_info.pad().second >= pool_size)),
- "Invalid pool size and pool pad combination!");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(pool_info.pool_size.width != pool_info.pool_size.height, "Invalid Pool size, width not equal to height!");
-
- // Checks performed when output is configured
- if(output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- unsigned int pooled_w = 0;
- unsigned int pooled_h = 0;
- std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(0),
- input->dimension(1),
- pool_size,
- pool_size,
- pool_info.pad_stride_info);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) != pooled_w) || (output->dimension(1) != pooled_h),
- "Invalid output pooling dimensions!");
- }
-
- return Status{};
-}
-
-std::tuple<Status, Window, GCPoolingConfig> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &pool_info)
-{
- int pool_pad_x = 0;
- int pool_pad_y = 0;
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- unsigned int pooled_w = 0;
- unsigned int pooled_h = 0;
- int pool_size = pool_info.pool_size.width;
- const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
- std::tie(pool_pad_x, pool_pad_y) = pad_stride_info.pad();
- std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- // Update pool size in case of global pooling
- pool_size = pool_info.is_global_pooling ? input->dimension(0) : pool_size;
-
- // Check output dimensions
- std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(0),
- input->dimension(1),
- pool_size,
- pool_size,
- pad_stride_info);
-
- auto_init(input, output, pooled_w, pooled_h);
-
- BorderSize border_size = BorderSize(pool_pad_y, pool_pad_x);
-
- const int input_width = input->dimension(0);
- const int input_height = input->dimension(1);
-
- unsigned int num_elems_processed_per_iteration = 1;
-
- // Create kernel
- if(pool_size == 3)
- {
- // Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenGLES kernel where
- // each thread computes 4 output elements
- const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3);
-
- int num_elems_read_per_iteration = pool_size;
-
- if(input->data_type() == DataType::F32)
- {
- if(is_pool3x3_stride_le3)
- {
- // Change the number of elements processed and number of elements read per iteration for pooling 3x3 with stride less equal than 3
- num_elems_processed_per_iteration = 4;
- num_elems_read_per_iteration = pool_size * (pool_stride_x + 1);
- }
- }
- else
- {
- if(is_pool3x3_stride_le3)
- {
- num_elems_processed_per_iteration = 4;
- }
- else
- {
- num_elems_processed_per_iteration = 2;
- }
- }
-
- const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width;
- const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
-
- border_size.right = std::max(upper_bound_w, pool_pad_x);
- border_size.bottom = std::max(upper_bound_h, pool_pad_y);
- }
- else // Run general case
- {
- if(input->data_type() == DataType::F32)
- {
- num_elems_processed_per_iteration = 1;
- }
- else
- {
- num_elems_processed_per_iteration = 2;
- }
-
- const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + pool_size) - input_width;
- const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
-
- border_size.right = std::max(upper_bound_w, pool_pad_x);
- border_size.bottom = std::max(upper_bound_h, pool_pad_y);
- }
- // Configure kernel window
- Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
-
- if(input->data_type() == DataType::F32)
- {
- AccessWindowStatic input_access(input, -pool_pad_x, -pool_pad_y, input_width + border_size.right, input_height + border_size.bottom);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- bool window_changed = update_window_and_padding(win, input_access, output_access);
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_tuple(err, win, GCPoolingConfig(num_elems_processed_per_iteration, border_size));
- }
- else
- {
- // Calculate output right and bottom border
- const int output_width = output->dimension(0);
- const int output_height = output->dimension(1);
- const int output_padding_right = ceil_to_multiple(output_width, num_elems_processed_per_iteration) - output_width;
- const int output_padding_bottom = ceil_to_multiple(output_height, 1) - output_height;
-
- const int input_total_width = std::max(int(input->padding().left), int(pool_pad_x)) + input_width + std::max(int(input->padding().right), int(pool_pad_x));
- const int input_padding_right = ceil_to_multiple(input_total_width, num_elems_processed_per_iteration) - input_width - pool_pad_x;
- const int input_total_height = std::max(int(input->padding().top), int(pool_pad_y)) + input_height + std::max(int(input->padding().bottom), int(pool_pad_y));
- const int input_padding_bottom = input_total_height - input_height - pool_pad_y;
-
- // Configure kernel window
- AccessWindowStatic input_access(input, -pool_pad_x, -pool_pad_y, input_width + input_padding_right, input_height + input_padding_bottom);
- AccessWindowStatic output_access(output, 0, 0, output_width + output_padding_right, output_height + output_padding_bottom);
- bool window_changed = update_window_and_padding(win, input_access, output_access);
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_tuple(err, win, GCPoolingConfig(num_elems_processed_per_iteration, border_size));
- }
-}
-} // namespace
-
-GCPoolingLayerKernel::GCPoolingLayerKernel()
- : _input(nullptr), _output(nullptr), _indices(nullptr), _pool_info(), _border_size(0), _num_elems_processed_per_iteration(1)
-{
-}
-
-BorderSize GCPoolingLayerKernel::border_size() const
-{
- return _border_size;
-}
-
-void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info, IGCTensor *indices)
-{
- int pool_pad_x = 0;
- int pool_pad_y = 0;
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- unsigned int pooled_w = 0;
- unsigned int pooled_h = 0;
- const PoolingType pool_type = pool_info.pool_type;
- int pool_size = pool_info.pool_size.width;
- const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
- const bool exclude_padding = pool_info.exclude_padding;
- std::tie(pool_pad_x, pool_pad_y) = pad_stride_info.pad();
- std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- // Update pool size in case of global pooling
- pool_size = pool_info.is_global_pooling ? input->info()->dimension(0) : pool_size;
-
- // Check output dimensions
- std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(0),
- input->info()->dimension(1),
- pool_size,
- pool_size,
- pad_stride_info);
-
- auto_init(input->info(), output->info(), pooled_w, pooled_h);
-
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, (indices) ? indices->info() : nullptr));
-
- // Set instance variables
- _input = input;
- _output = output;
- _pool_info = pool_info;
- _indices = indices;
- // Set build options
- std::set<std::string> build_opts;
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
- if(input->info()->data_type() == DataType::F32)
- {
- build_opts.insert("#define DATA_TYPE_FP32");
- }
- else
- {
- build_opts.insert("#define DATA_TYPE_FP16");
- }
- if(exclude_padding)
- {
- build_opts.emplace("#define EXCLUDE_PADDING");
- }
- build_opts.emplace(("#define POOL_" + string_from_pooling_type(pool_type)));
- build_opts.emplace(("#define STRIDE_X " + support::cpp11::to_string(pool_stride_x)));
- build_opts.emplace(("#define MAX_WIDTH " + support::cpp11::to_string(input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x))));
- build_opts.emplace(("#define MAX_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y))));
- build_opts.emplace(("#define STRIDE_Y " + support::cpp11::to_string(pool_stride_y)));
- build_opts.emplace(("#define PAD_X " + support::cpp11::to_string(pool_pad_x)));
- build_opts.emplace(("#define PAD_Y " + support::cpp11::to_string(pool_pad_y)));
-
- // Create kernel
- if((pool_size == 2) || (pool_size == 3) || (pool_size == 7))
- {
- // Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenGLES kernel where
- // each thread computes 4 output elements
- const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3);
-
- std::string kernel_name = "pooling_layer_" + support::cpp11::to_string(pool_size);
- if(is_pool3x3_stride_le3)
- {
- build_opts.insert("#define POOLING_LAYER_3_OPTIMIZED");
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name + "_optimized", build_opts));
- }
- else
- {
- build_opts.insert("#define POOLING_LAYER_" + support::cpp11::to_string(pool_size));
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, build_opts));
- }
- }
- else // Run general case
- {
- build_opts.emplace(("#define POOL_SIZE " + support::cpp11::to_string(pool_size)));
-
- build_opts.insert("#define POOLING_LAYER_N");
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("pooling_layer_n", build_opts));
- }
- // Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info(), pool_info);
- ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
-
- IGCKernel::configure(std::get<1>(win_config));
- GCPoolingConfig pooling_config = std::get<2>(win_config);
- _num_elems_processed_per_iteration = pooling_config.first;
- _border_size = pooling_config.second;
-}
-
-Status GCPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, indices));
- ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get(), pool_info)));
-
- return Status{};
-}
-
-void GCPoolingLayerKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- unsigned int pool_pad_x;
- unsigned int pool_pad_y;
- unsigned int pool_stride_x;
- unsigned int pool_stride_y;
- std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info.pad();
- std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info.stride();
-
- _kernel.use();
-
- _output->set_needs_shifting(true);
-
- Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
-
- Window slice = window_collapsed.first_slice_window_3D();
- Window slice_in_orig = window_collapsed.first_slice_window_3D();
-
- slice.shift(Window::DimX, -(_output->info()->padding()).left);
-
- do
- {
- // Upsample input by pool size
- Window in_slice(slice_in_orig); // NOLINT
- in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start() - pool_pad_x, in_slice.x().end() * pool_stride_x, pool_stride_x * _num_elems_processed_per_iteration));
- in_slice.set(Window::DimY, Window::Dimension(in_slice.y().start() - pool_pad_y, in_slice.y().end() * pool_stride_y, pool_stride_y));
-
- // Set inputs
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, 1, in_slice);
- add_3D_tensor_argument(idx, _output, 2, slice);
-
- _kernel.update_shader_params();
- enqueue(*this, slice);
- }
- while(window_collapsed.slide_window_slice_3D(slice) && window_collapsed.slide_window_slice_3D(slice_in_orig));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp
deleted file mode 100644
index a85a0e7e98..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (c) 2016-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-BorderSize GCScaleKernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void GCScaleKernel::configure(const IGCTensor *input, IGCTensor *output, InterpolationPolicy policy, bool border_undefined, SamplingPolicy sampling_policy)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16);
- ARM_COMPUTE_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON(output == input);
- ARM_COMPUTE_ERROR_ON(policy != InterpolationPolicy::NEAREST_NEIGHBOR);
-
- _input = input;
- _output = output;
-
- // Compute the ratio between source width/height and destination width/height
- const auto wr = static_cast<float>(input->info()->dimension(0)) / static_cast<float>(output->info()->dimension(0));
- const auto hr = static_cast<float>(input->info()->dimension(1)) / static_cast<float>(output->info()->dimension(1));
-
- // Compute actual border size
- BorderSize border = border_undefined ? BorderSize(0) : border_size();
-
- // Area interpolation behaves as Nearest Neighbour in case of up-sampling
- if(policy == InterpolationPolicy::AREA && wr <= 1.f && hr <= 1.f)
- {
- policy = InterpolationPolicy::NEAREST_NEIGHBOR;
- }
- else
- {
- ARM_COMPUTE_ERROR_ON(policy == InterpolationPolicy::AREA);
- }
-
- // Create kernel
- std::set<std::string> build_opts;
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-
- build_opts.emplace("#define DATA_TYPE_FP16");
- build_opts.emplace("#define BORDER_SIZE " + support::cpp11::to_string(border.right));
- if(sampling_policy == SamplingPolicy::TOP_LEFT)
- {
- build_opts.emplace("#define SAMPLING_POLICY_TOP_LEFT");
- }
- else
- {
- build_opts.emplace("#define SAMPLING_POLICY_CENTER");
- }
-
- // Configure kernel window
- unsigned int num_elems_processed_per_iteration = 4;
- unsigned int input_width_alignment = 2;
-
- // performance optimization for 2x upscaling with no border
- if((fabs(wr - 0.5) < 1e-6) && (fabs(hr - 0.5) < 1e-6) && border_undefined)
- {
- num_elems_processed_per_iteration = 8;
- input_width_alignment = 4;
- build_opts.emplace("#define SCALE_NEAREST_8X");
- }
- else
- {
- build_opts.emplace("#define SCALE_NEAREST_GENERIC");
- }
-
- std::string interpolation_name = string_from_interpolation_policy(policy); // NOLINT
- std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower);
- std::string kernel_name = "scale_" + interpolation_name;
- _kernel = GCKernelLibrary::get().create_kernel(kernel_name, build_opts);
-
- Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-
- const ValidRegion &input_valid_region = input->info()->valid_region();
-
- const int total_width = border.left + input_valid_region.anchor[0] + input_valid_region.shape[0] + border.right;
- const int padding_right = ceil_to_multiple(total_width, input_width_alignment) - border.left - input_valid_region.anchor[0] - input_valid_region.shape[0];
-
- // Reads can occur within the valid region of the input
- AccessWindowStatic input_access(input->info(),
- input_valid_region.anchor[0] - border.left, input_valid_region.anchor[1] - border.top,
- input_valid_region.anchor[0] + input_valid_region.shape[0] + padding_right,
- input_valid_region.anchor[1] + input_valid_region.shape[1] + border.bottom);
-
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, calculate_valid_region_scale(*(input->info()),
- output->info()->tensor_shape(),
- policy,
- sampling_policy,
- border_undefined));
-
- IGCKernel::configure(win);
-
- unsigned int idx = 2 * num_arguments_per_3D_tensor(); //Skip the tensor parameters
- _kernel.set_argument<float>(idx++, static_cast<float>(input->info()->dimension(0)));
- _kernel.set_argument<float>(idx++, static_cast<float>(input->info()->dimension(1)));
- _kernel.set_argument<float>(idx++, wr);
- _kernel.set_argument<float>(idx++, hr);
-}
-
-void GCScaleKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- _kernel.use();
-
- _output->set_needs_shifting(true);
-
- Window slice = window.first_slice_window_3D();
- Window slice_in = window.first_slice_window_3D();
-
- slice.shift(Window::DimX, -(_output->info()->padding()).left);
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, 1, slice_in);
- add_3D_tensor_argument(idx, _output, 2, slice);
- _kernel.update_shader_params();
- enqueue(*this, slice);
- }
- while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
deleted file mode 100644
index f250801eaf..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-void GCLogits1DMaxKernel::configure(const IGCTensor *input, IGCTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-
- // Softmax across the x dimension
- TensorShape output_shape{ input->info()->tensor_shape() };
- output_shape.set(0, 1);
-
- // Output auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
-
- _input = input;
- _output = output;
-
- // Set build options
- std::set<std::string> build_opts;
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- build_opts.insert("#define " + dt_name);
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
- build_opts.insert("#define SOFTMAX_LAYER_MAX");
-
- // Tell the kernel that the width is not a multiple of 8
- if((input->info()->dimension(0) % 8) != 0)
- {
- build_opts.insert("#define NON_MULTIPLE_OF_8");
- }
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("softmax_layer_max", build_opts));
-
- // Set fixed arguments
- unsigned int idx = 2 * num_arguments_per_3D_tensor(); //Skip the input and output parameters
- _kernel.set_argument(idx++, input->info()->dimension(0));
-
- // Configure kernel window
- // The kernel loops over all elements in steps of 8
- const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 8);
- unsigned int num_elems_written_per_iteration = 1;
- if(input->info()->data_type() == DataType::F16)
- {
- num_elems_written_per_iteration = 2;
- }
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- IGCKernel::configure(win);
-}
-
-GCLogits1DShiftExpSumKernel::GCLogits1DShiftExpSumKernel()
- : _input(nullptr), _max(nullptr), _output(nullptr), _sum(nullptr)
-{
-}
-
-void GCLogits1DShiftExpSumKernel::configure(const IGCTensor *input, const IGCTensor *max, IGCTensor *output, IGCTensor *sum)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output);
-
- // Output auto initialization if not yet initialized
- auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type());
- auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type());
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, max, sum);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(max, sum);
-
- _input = input;
- _max = max;
- _output = output;
- _sum = sum;
-
- // Set build options
- std::set<std::string> build_opts;
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- build_opts.insert("#define " + dt_name);
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
- build_opts.insert("#define SOFTMAX_LAYER_SHIFT_EXP_SUM");
-
- // Tell the kernel that the width is not a multiple of 8
- if((input->info()->dimension(0) % 8) != 0)
- {
- build_opts.insert("#define NON_MULTIPLE_OF_8");
- }
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("softmax_layer_shift_exp_sum", build_opts));
-
- // Set fixed arguments
- unsigned int idx = 4 * num_arguments_per_3D_tensor(); //Skip the input and output parameters
- _kernel.set_argument(idx++, input->info()->dimension(0));
-
- // Configure window
- // The kernel loops over all elements in steps of 8
- const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 8);
- unsigned int num_elems_written_per_iteration = 1;
- if(input->info()->data_type() == DataType::F16)
- {
- num_elems_written_per_iteration = 2;
- }
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal max_access(max->info(), 0, num_elems_written_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal sum_access(sum->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_access, max_access, output_access, sum_access);
-
- output_access.set_valid_region(win, input->info()->valid_region());
- sum_access.set_valid_region(win, ValidRegion(Coordinates(), sum->info()->tensor_shape()));
-
- IGCKernel::configure(win);
-}
-
-void GCLogits1DShiftExpSumKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
- Window slice = window_collapsed.first_slice_window_3D();
-
- _kernel.use();
-
- do
- {
- unsigned int idx = 0;
- unsigned int binding = 1; // SSBO binding starts from 1.
- // Set inputs
- add_3D_tensor_argument(idx, _input, binding++, slice);
- add_3D_tensor_argument(idx, _max, binding++, slice);
- add_3D_tensor_argument(idx, _output, binding++, slice);
- add_3D_tensor_argument(idx, _sum, binding++, slice);
- _kernel.update_shader_params();
- enqueue(*this, slice);
- }
- while(window_collapsed.slide_window_slice_3D(slice));
-}
-
-GCLogits1DNormKernel::GCLogits1DNormKernel()
- : _input(nullptr), _sum(nullptr), _output(nullptr)
-{
-}
-
-void GCLogits1DNormKernel::configure(const IGCTensor *input, const IGCTensor *sum, IGCTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_NULLPTR(sum, output);
-
- // Output auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type());
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-
- _input = input;
- _sum = sum;
- _output = output;
-
- // Set build options
- std::set<std::string> build_opts;
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- build_opts.insert("#define " + dt_name);
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
- build_opts.insert("#define SOFTMAX_LAYER_NORM");
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("softmax_layer_norm", build_opts));
-
- // Configure window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- unsigned int num_elems_written_per_iteration = 1;
- if(input->info()->data_type() == DataType::F16)
- {
- num_elems_written_per_iteration = 2;
- }
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowStatic sum_access(sum->info(), 0, 0, num_elems_written_per_iteration, sum->info()->dimension(1));
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, input_access, sum_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region());
-
- IGCKernel::configure(win);
-}
-
-void GCLogits1DNormKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
- Window slice = window_collapsed.first_slice_window_3D();
-
- _kernel.use();
-
- do
- {
- Window sum_slice = slice;
- sum_slice.set(Window::DimX, Window::Dimension(0, 1, 1));
-
- unsigned int idx = 0;
- unsigned int binding = 1; // SSBO binding starts from 1.
- // Set inputs
- add_3D_tensor_argument(idx, _input, binding++, slice);
- add_3D_tensor_argument(idx, _sum, binding++, slice);
- add_3D_tensor_argument(idx, _output, binding++, slice);
-
- _kernel.update_shader_params();
- enqueue(*this, slice);
- }
- while(window_collapsed.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp
deleted file mode 100644
index 16dafaf543..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-GCTensorShiftKernel::GCTensorShiftKernel()
- : _input(nullptr), _lws(gles::NDRange(1U, 1U, 1U)), _left_padding(0)
-{
-}
-
-void GCTensorShiftKernel::configure(IGCTensor *input)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-
- _input = input;
-
- std::set<std::string> options;
- options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0]));
- options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1]));
- options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2]));
- options.emplace("#define WIDTH " + support::cpp11::to_string(input->info()->dimension(0)));
-
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- options.emplace(("#define " + dt_name));
-
- unsigned int num_elems_written_per_iteration_x = input->info()->dimension(0) + input->info()->padding().left + input->info()->padding().right;
-
- std::stringstream kernel_name;
- kernel_name << "tensorshift";
-
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name.str(), options));
-
- Window win;
- win.set(Window::DimX, Window::Dimension(0, num_elems_written_per_iteration_x, num_elems_written_per_iteration_x));
- win.use_tensor_dimensions(input->info()->tensor_shape(), Window::DimY);
- win.use_tensor_dimensions(input->info()->tensor_shape(), Window::DimZ);
-
- _left_padding = _input->info()->padding().left;
-
- IGCKernel::configure(win);
-}
-
-void GCTensorShiftKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- if(int(_left_padding) == 0 || !_input->needs_shifting())
- {
- return;
- }
-
- _kernel.use();
-
- // Get initial windows
- Window slice = window.first_slice_window_3D();
- slice.shift(Window::DimX, -(_input->info()->padding()).left);
-
- do
- {
- unsigned int idx = 0;
-
- add_3D_tensor_argument(idx, _input, 1, slice);
-
- _kernel.set_argument(idx++, static_cast<unsigned int>(_left_padding));
-
- _kernel.update_shader_params();
- enqueue(*this, slice, _lws);
- }
- while(window.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
deleted file mode 100644
index ead50ce1f5..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-void GCTransposeKernel::configure(const IGCTensor *input, IGCTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-
- TensorShape output_shape{ input->info()->tensor_shape() };
- const size_t w_out = input->info()->dimension(1);
- const size_t h_out = input->info()->dimension(0);
- output_shape.set(0, w_out);
- output_shape.set(1, h_out);
-
- // Output tensor auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- _input = input;
- _output = output;
-
- // for better performance
- if(w_out < 512 && h_out < 512)
- {
- _lws_hint = gles::NDRange(8U, 1U, 1U);
- }
- else
- {
- _lws_hint = gles::NDRange(1U, 8U, 1U);
- }
-
- std::set<std::string> build_opts;
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- build_opts.emplace(("#define " + dt_name));
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws_hint[0]));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws_hint[1]));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws_hint[2]));
-
- // Configure kernel window
- unsigned int num_elems_processed_per_iteration = 4;
-
- if(input->info()->data_type() == DataType::F16)
- {
-#define TRANSPOSE_8X8
-
-#if defined(TRANSPOSE_4X4)
- build_opts.emplace(("#define TRANSPOSE_4X4"));
- num_elems_processed_per_iteration = 4;
-#elif defined(TRANSPOSE_8X8) /* TRANSPOSE_4X4 */
- if(w_out != h_out)
- {
- build_opts.emplace("#define TRANSPOSE_8X8");
- num_elems_processed_per_iteration = 8;
- }
- else
- {
- build_opts.emplace("#define TRANSPOSE_8X8_SQUARE");
- num_elems_processed_per_iteration = 8;
- }
-#endif /* TRANSPOSE_4X4 */
- }
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("transpose", build_opts));
-
- const unsigned int width_aligned = num_elems_processed_per_iteration * static_cast<unsigned int>(_lws_hint[0]);
- const unsigned int height_aligned = num_elems_processed_per_iteration * static_cast<unsigned int>(_lws_hint[1]);
-
- AccessWindowStatic input_access(input->info(), 0, 0,
- ceil_to_multiple(input->info()->dimension(0), width_aligned),
- ceil_to_multiple(input->info()->dimension(1), height_aligned));
- AccessWindowStatic output_access(output->info(), 0, 0,
- ceil_to_multiple(output->info()->dimension(0), height_aligned),
- ceil_to_multiple(output->info()->dimension(1), width_aligned));
-
- Window win = calculate_max_window(*input->info(), Steps(width_aligned, height_aligned));
- win.set_dimension_step(Window::DimX, num_elems_processed_per_iteration);
- win.set_dimension_step(Window::DimY, num_elems_processed_per_iteration);
- update_window_and_padding(win, input_access, output_access);
- output_access.set_valid_region(win, output->info()->valid_region());
-
- IGCKernel::configure(win);
-}
-
-void GCTransposeKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
- _kernel.use();
-
- Window slice = window.first_slice_window_2D();
-
- do
- {
- unsigned int idx = 0;
-
- add_2D_tensor_argument(idx, _input, 1, slice);
- add_2D_tensor_argument(idx, _output, 2, slice);
- _kernel.update_shader_params();
- enqueue(*this, slice, _lws_hint);
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp
deleted file mode 100644
index 07c09fa4ea..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2017-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "support/StringSupport.h"
-
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-GCWeightsReshapeKernel::GCWeightsReshapeKernel()
- : _input(nullptr), _biases(nullptr), _output(nullptr)
-{
-}
-
-void GCWeightsReshapeKernel::configure(const IGCTensor *input, const IGCTensor *biases, IGCTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-
- // Output tensor auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_weights_reshaped_shape(*input->info(), (biases != nullptr))));
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- if(biases != nullptr)
- {
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
- ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->num_dimensions() != 1));
- ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->num_dimensions() != 2));
- ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3]));
- ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3] || biases->info()->dimension(1) != input->info()->tensor_shape()[4]));
- }
-
- _biases = biases;
- _output = output;
- _input = input;
-
- // Create build options
- std::set<std::string> build_opts;
- std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
- build_opts.emplace("#define " + dt_name);
- build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
- build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
- build_opts.emplace("#define RESHAPE_TO_COLUMNS");
- if(biases != nullptr)
- {
- build_opts.emplace("#define HAS_BIAS");
- }
-
- // Create kernel
- _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("reshape_to_columns", build_opts));
-
- // Set static arguments
- unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor();
- idx += (biases != nullptr) ? num_arguments_per_1D_tensor() : 0;
- _kernel.set_argument(idx++, _input->info()->dimension(0));
- _kernel.set_argument(idx++, _input->info()->dimension(1));
- _kernel.set_argument(idx++, _input->info()->dimension(2));
- _kernel.set_argument(idx++, _input->info()->dimension(3));
-
- // Configure window
- Window win = calculate_max_window(*input->info(), Steps());
-
- // The GCWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped
- output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
- IGCKernel::configure(win);
-}
-
-void GCWeightsReshapeKernel::run(const Window &window)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
-
- Window out_window;
- out_window.use_tensor_dimensions(_output->info()->tensor_shape());
-
- Window in_slice = window.first_slice_window_3D();
- Window out_slice = out_window.first_slice_window_2D();
-
- Window biases_window;
- Window biases_slice;
-
- if(_biases != nullptr)
- {
- biases_window.use_tensor_dimensions(_biases->info()->tensor_shape());
- biases_slice = biases_window.first_slice_window_1D();
- }
-
- _kernel.use();
-
- do
- {
- // Set arguments
- unsigned idx = 0;
- add_3D_tensor_argument(idx, _input, 1, in_slice);
- add_2D_tensor_argument(idx, _output, 2, out_slice);
- if(_biases != nullptr)
- {
- add_1D_tensor_argument(idx, _biases, 3, biases_slice);
- biases_window.slide_window_slice_1D(biases_slice);
- }
-
- _kernel.update_shader_params();
- // Run kernel
- enqueue(*this, in_slice);
- }
- while(window.slide_window_slice_4D(in_slice) && out_window.slide_window_slice_2D(out_slice));
-}