diff options
Diffstat (limited to 'src/core/GLES_COMPUTE/kernels')
25 files changed, 0 insertions, 4638 deletions
diff --git a/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp deleted file mode 100644 index 5e8accc95d..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include <set> -#include <string> - -using namespace arm_compute; - -GCAbsoluteDifferenceKernel::GCAbsoluteDifferenceKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr) -{ -} - -void GCAbsoluteDifferenceKernel::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output); - - _input1 = input1; - _input2 = input2; - _output = output; - - constexpr unsigned int num_elems_processed_per_iteration = 4; - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("absdiff", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowRectangle input1_access(input1->info(), 0, 0, 4, 1); - AccessWindowRectangle input2_access(input2->info(), 0, 0, 4, 1); - AccessWindowRectangle output_access(output->info(), 0, 0, 4, 1); - - update_window_and_padding(win, input1_access, input2_access, output_access); - - ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(), - input2->info()->valid_region()); - - output_access.set_valid_region(win, valid_region); - - IGCKernel::configure(win); -} - -void GCAbsoluteDifferenceKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); - - _kernel.use(); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - unsigned int binding = 1; // SSBO binding starts from 1. - add_2D_tensor_argument(idx, _input1, binding++, slice); - add_2D_tensor_argument(idx, _input2, binding++, slice); - add_2D_tensor_argument(idx, _output, binding++, slice); - - _kernel.update_shader_params(); - - enqueue(*this, slice); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp deleted file mode 100644 index 0173b81cf8..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h" - -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include <set> -#include <string> - -using namespace arm_compute; - -GCActivationLayerKernel::GCActivationLayerKernel(GCCoreRuntimeContext *ctx) - : _input(nullptr), _output(nullptr), _ctx(ctx) -{ -} - -void GCActivationLayerKernel::configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - - // Make sure _kernel is initialized before calling the parent's configure - _input = input; - _output = input; - - if(output != nullptr) - { - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - _output = output; - } - - unsigned int num_elems_processed_per_iteration = 4 / input->info()->element_size(); - - // Set build options - std::set<std::string> build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.emplace(("#define " + string_from_activation_func(act_info.activation()))); - build_opts.emplace(("#define " + dt_name)); - build_opts.emplace(("#define A_VAL " + float_to_string_with_full_precision(act_info.a()))); - build_opts.emplace(("#define B_VAL " + float_to_string_with_full_precision(act_info.b()))); - build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1))); - build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1))); - build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1))); - - // Create kernel - _kernel = create_opengl_kernel(_ctx, "activation_layer", build_opts); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - if(output != nullptr) - { - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, - AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration), - output_access); - - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - } - else - { - update_window_and_padding(win, - AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration)); - } - - IGCKernel::configure(win); -} - -void GCActivationLayerKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); - - _kernel.use(); - - _output->set_needs_shifting(true); - - Window collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ); - Window slice = collapsed.first_slice_window_3D(); - Window slice_in = collapsed.first_slice_window_3D(); - - slice.shift(Window::DimX, -(_output->info()->padding()).left); - - if(_input == _output) - { - slice_in.shift(Window::DimX, -(_input->info()->padding()).left); - } - - do - { - unsigned int idx = 0; - unsigned int binding = 1; - add_3D_tensor_argument(idx, _input, binding++, slice); - add_3D_tensor_argument(idx, _output, binding++, slice_in); - _kernel.update_shader_params(); - enqueue(*this, slice); - } - while(collapsed.slide_window_slice_3D(slice) && collapsed.slide_window_slice_3D(slice_in)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp deleted file mode 100644 index f31c8ca156..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include <cstddef> -#include <set> -#include <string> - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy) -{ - ARM_COMPUTE_UNUSED(policy); - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F16); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::F16); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, input2); - - // Validate in case of configured output - if((output != nullptr) && (output->total_size() != 0)) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, output); - } - - return Status{}; -} - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output) -{ - constexpr unsigned int num_elems_processed_per_iteration = 8; - - Window win = calculate_max_window(*input1, Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input1_access(input1, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal input2_access(input2, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win, input1_access, input2_access, output_access); - - ValidRegion valid_region = intersect_valid_regions(input1->valid_region(), - input2->valid_region()); - - output_access.set_valid_region(win, valid_region); - - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_pair(err, win); -} -} // namespace - -GCArithmeticAdditionKernel::GCArithmeticAdditionKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr) -{ -} - -void GCArithmeticAdditionKernel::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, ConvertPolicy policy) -{ - ARM_COMPUTE_UNUSED(policy); - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - - // Auto initialize output if not initialized - { - set_shape_if_empty(*output->info(), input1->info()->tensor_shape()); - set_format_if_unknown(*output->info(), Format::F16); - } - - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info(), policy)); - - _input1 = input1; - _input2 = input2; - _output = output; - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("arithmetic_add", build_opts)); - - // Configure kernel window - auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - IGCKernel::configure(win_config.second); -} - -Status GCArithmeticAdditionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output, policy)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), input2->clone().get(), output->clone().get()).first); - - return Status{}; -} - -void GCArithmeticAdditionKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); - - _kernel.use(); - - _output->set_needs_shifting(true); - - Window slice = window.first_slice_window_3D(); - Window slice_in = window.first_slice_window_3D(); - - slice.shift(Window::DimX, -(_output->info()->padding()).left); - - do - { - unsigned int idx = 0; - unsigned int binding = 1; // SSBO binding starts from 1. - add_3D_tensor_argument(idx, _input1, binding++, slice_in); - add_3D_tensor_argument(idx, _input2, binding++, slice_in); - add_3D_tensor_argument(idx, _output, binding++, slice); - - _kernel.update_shader_params(); - - enqueue(*this, slice); - } - while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp deleted file mode 100644 index 9281ce5ffb..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h" - -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include "support/StringSupport.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *mean, const ITensorInfo *var, - const ITensorInfo *beta, const ITensorInfo *gamma, - float epsilon, ActivationLayerInfo act_info) -{ - ARM_COMPUTE_UNUSED(epsilon); - ARM_COMPUTE_UNUSED(var); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, var); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(mean, var); - - if(output->total_size() != 0) - { - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - } - - if(beta != nullptr) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, beta); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, beta); - } - if(gamma != nullptr) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, gamma); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, gamma); - } - if(act_info.enabled()) - { - ARM_COMPUTE_ERROR_ON(input->data_type() != DataType::F32 && input->data_type() != DataType::F16); - ARM_COMPUTE_ERROR_ON(act_info.activation() != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::RELU - && act_info.activation() != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU - && act_info.activation() != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU); - ARM_COMPUTE_ERROR_ON(act_info.b() > act_info.a()); - } - return Status{}; -} - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, - ITensorInfo *mean, ITensorInfo *var, - ITensorInfo *beta, ITensorInfo *gamma) -{ - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output, input->tensor_shape(), 1, input->data_type()); - - unsigned int num_elems_processed_per_iteration = 1; - if(input->data_type() == DataType::F16) - { - num_elems_processed_per_iteration = 4; - } - - // Configure kernel window - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - AccessWindowStatic mean_access(mean, 0, 0, mean->dimension(0) + 3, mean->dimension(1)); - AccessWindowStatic var_access(var, 0, 0, var->dimension(0) + 3, var->dimension(1)); - - bool window_changed = false; - if(beta != nullptr) - { - AccessWindowStatic beta_access(beta, 0, 0, beta->dimension(0) + 3, beta->dimension(1)); - if(gamma != nullptr) - { - AccessWindowStatic gamma_access(gamma, 0, 0, gamma->dimension(0) + 3, gamma->dimension(1)); - window_changed = update_window_and_padding(win, input_access, output_access, mean_access, var_access, beta_access, gamma_access); - } - else - { - window_changed = update_window_and_padding(win, input_access, output_access, mean_access, var_access, beta_access); - } - } - else - { - if(gamma != nullptr) - { - AccessWindowStatic gamma_access(gamma, 0, 0, gamma->dimension(0) + 3, gamma->dimension(1)); - window_changed = update_window_and_padding(win, input_access, output_access, mean_access, var_access, gamma_access); - } - else - { - window_changed = update_window_and_padding(win, input_access, output_access, mean_access, var_access); - } - } - output_access.set_valid_region(win, input->valid_region()); - - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_pair(err, win); -} -} // namespace - -GCBatchNormalizationLayerKernel::GCBatchNormalizationLayerKernel() - : _input(nullptr), _output(nullptr), _mean(nullptr), _var(nullptr), _beta(nullptr), _gamma(nullptr), _epsilon(0.0f) -{ -} - -void GCBatchNormalizationLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, - float epsilon, ActivationLayerInfo act_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, var); - - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), mean->info(), var->info(), - (beta != nullptr) ? beta->info() : nullptr, (gamma != nullptr) ? gamma->info() : nullptr, - epsilon, act_info)); - - _input = input; - _output = output; - _mean = mean; - _var = var; - _beta = beta; - _gamma = gamma; - _epsilon = epsilon; - - // Set build options - std::set<std::string> build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.emplace(("#define " + dt_name)); - build_opts.emplace(("#define ESPILON " + float_to_string_with_full_precision(_epsilon))); - build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1))); - build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1))); - build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1))); - if(beta == nullptr) - { - build_opts.emplace("#define USE_DEFAULT_BETA"); - } - if(gamma == nullptr) - { - build_opts.emplace("#define USE_DEFAULT_GAMMA"); - } - - if(act_info.enabled()) - { - build_opts.emplace("#define " + string_from_activation_func(act_info.activation())); - build_opts.emplace("#define A_VAL " + float_to_string_with_full_precision(act_info.a())); - build_opts.emplace("#define B_VAL " + float_to_string_with_full_precision(act_info.b())); - } - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("batchnormalization_layer", build_opts)); - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info(), mean->info(), var->info(), - (beta != nullptr) ? beta->info() : nullptr, (gamma != nullptr) ? gamma->info() : nullptr); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - - IGCKernel::configure(win_config.second); -} - -Status GCBatchNormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *mean, const ITensorInfo *var, - const ITensorInfo *beta, const ITensorInfo *gamma, - float epsilon, ActivationLayerInfo act_info) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, mean, var, beta, gamma, epsilon, act_info)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), - mean->clone().get(), var->clone().get(), - beta->clone().get(), gamma->clone().get()) - .first); - - return Status{}; -} - -void GCBatchNormalizationLayerKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - _kernel.use(); - - _output->set_needs_shifting(true); - - Window slice = window.first_slice_window_3D(); - Window slice_in = window.first_slice_window_3D(); - - Window vector_slice = window.first_slice_window_1D(); - vector_slice.set(Window::DimX, Window::Dimension(0, 0, 0)); - - unsigned int idx = 2 * num_arguments_per_3D_tensor(); - unsigned int binding_point = 3; - add_1D_tensor_argument(idx, _mean, binding_point, vector_slice); - add_1D_tensor_argument(idx, _var, ++binding_point, vector_slice); - if(_beta != nullptr) - { - add_1D_tensor_argument(idx, _beta, ++binding_point, vector_slice); - } - if(_gamma != nullptr) - { - add_1D_tensor_argument(idx, _gamma, ++binding_point, vector_slice); - } - - slice.shift(Window::DimX, -(_output->info()->padding()).left); - - do - { - idx = 0; - add_3D_tensor_argument(idx, _input, 1, slice_in); - add_3D_tensor_argument(idx, _output, 2, slice); - - _kernel.update_shader_params(); - enqueue(*this, slice); - } - while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp deleted file mode 100644 index 5781c564ea..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -GCCol2ImKernel::GCCol2ImKernel() - : _input(nullptr), _output(nullptr), _convolved_dims() -{ -} - -void GCCol2ImKernel::configure(const IGCTensor *input, IGCTensor *output, - std::pair<unsigned int, unsigned int> convolved_dims) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); - - TensorShape output_shape = input->info()->tensor_shape(); - output_shape.set(0, convolved_dims.first); - output_shape.set(1, convolved_dims.second); - output_shape.set(2, input->info()->tensor_shape()[0]); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape)); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - _input = input; - _output = output; - _convolved_dims = convolved_dims; - - const DataType dt = input->info()->data_type(); - const unsigned int local_size = 1; - - // Create kernel - std::set<std::string> build_opts; - build_opts.emplace("#define COL2IM "); - build_opts.emplace("#define WIDTH_OUTPUT " + support::cpp11::to_string(_convolved_dims.first)); - const std::string dt_name = (dt == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.emplace(("#define " + dt_name)); - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(local_size)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(local_size)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(local_size)); - - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("col2im", build_opts)); - - // Configure window - const unsigned int num_elems_processed_per_iteration = (dt == DataType::F32) ? 1 : 2; - - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - const int input_padding = ceil_to_multiple(input->info()->dimension(0), 2) - input->info()->dimension(0); - - AccessWindowStatic input_access(input->info(), 0, 0, input->info()->dimension(0) + input_padding, input->info()->dimension(1) + 1); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, output->info()->valid_region()); - - IGCKernel::configure(win); -} - -void GCCol2ImKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); - - _kernel.use(); - - Window collapsed_window = window.collapse_if_possible(IGCKernel::window(), Window::DimZ); - Window slice = collapsed_window.first_slice_window_3D(); - - // Set static kernel arguments - unsigned int idx = 2 * num_arguments_per_3D_tensor(); - //_kernel.set_argument(idx++, _output->info()->strides_in_bytes()[3]); - _kernel.set_argument(idx++, uint(_output->info()->dimension(2))); - _kernel.set_argument(idx++, _input->info()->strides_in_bytes()[2]); - - do - { - // Set inputs - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, 1, slice); - add_3D_tensor_argument(idx, _output, 2, slice); - _kernel.update_shader_params(); - enqueue(*this, slice); - } - while(collapsed_window.slide_window_slice_3D(slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp deleted file mode 100644 index 3256f11e74..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include "support/StringSupport.h" - -using namespace arm_compute; - -GCDepthConcatenateLayerKernel::GCDepthConcatenateLayerKernel() - : _input(nullptr), _output(nullptr), _depth_offset(0) -{ -} -void GCDepthConcatenateLayerKernel::configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_ERROR_ON(input->info()->dimension(Window::DimX) != output->info()->dimension(Window::DimX)); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(Window::DimY) != output->info()->dimension(Window::DimY)); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2)); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output); - - _input = input; - _output = output; - _depth_offset = depth_offset; - - // Add build options - std::set<std::string> build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.emplace(("#define " + dt_name)); - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("concatenate_depth", build_opts)); - - unsigned int num_elems_processed_per_iteration = 1; - if(input->info()->data_type() == DataType::F16) - { - num_elems_processed_per_iteration = 4; - } - - // The window needs to be based on input as we copy all the depths of input - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1)); - - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - - IGCKernel::configure(win); -} - -void GCDepthConcatenateLayerKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); - - _kernel.use(); - - _output->set_needs_shifting(true); - - Window slice_in = window.first_slice_window_3D(); - Window slice_out = window.first_slice_window_3D(); - - slice_out.set(Window::DimZ, Window::Dimension(_depth_offset)); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, 1, slice_in); - add_3D_tensor_argument(idx, _output, 2, slice_out); - - _kernel.update_shader_params(); - - enqueue(*this, slice_in); - } - while(window.slide_window_slice_3D(slice_in)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp deleted file mode 100644 index 95d487b4dd..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -GCDepthwiseConvolutionLayer3x3Kernel::GCDepthwiseConvolutionLayer3x3Kernel() - : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_left(0), _conv_pad_top(0), _lws(gles::NDRange(1U, 1U, 1U)) -{ -} - -BorderSize GCDepthwiseConvolutionLayer3x3Kernel::border_size() const -{ - return _border_size; -} - -void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3); - - if(biases != nullptr) - { - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases); - ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(2)); - ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1); - } - - // Get convolved dimensions - const TensorShape output_shape = compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), - output_shape, - 1, - input->info()->data_type()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); - ARM_COMPUTE_ERROR_ON(output->info()->dimension(2) != weights->info()->dimension(2)); - - _input = input; - _output = output; - _weights = weights; - _biases = biases; - _conv_stride_x = conv_info.stride().first; - _conv_stride_y = conv_info.stride().second; - _conv_pad_left = conv_info.pad_left(); - _conv_pad_top = conv_info.pad_top(); - _border_size = BorderSize(_conv_pad_top, conv_info.pad_right(), conv_info.pad_bottom(), _conv_pad_left); - - // Set build options - ARM_COMPUTE_ERROR_ON(_conv_stride_x < 1 || _conv_stride_x > 3); - std::set<std::string> options; - - options.emplace("#define DEPTH_MULTIPLIER " + support::cpp11::to_string(depth_multiplier)); - options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0])); - options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1])); - options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2])); - options.emplace("#define STRIDE_X " + support::cpp11::to_string(_conv_stride_x)); - options.emplace("#define STRIDE_Y " + support::cpp11::to_string(_conv_stride_y)); - - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - options.emplace(("#define " + dt_name)); - - unsigned int num_elems_read_per_iteration_x = 8; - unsigned int num_elems_read_per_iteration_y = 1; - unsigned int num_elems_written_per_iteration_x = 4; - unsigned int num_elems_written_per_iteration_y = 1; - unsigned int num_elems_written_per_iteration_z = 1; - - if((_conv_stride_x == 1) && (_conv_stride_y == 1)) - { - switch(input->info()->data_type()) - { -#define PROCESS_4X_3Y_1Z - - case DataType::F16: -#if defined(PROCESS_4X_3Y_1Z) - options.emplace("#define PROCESS_4X_3Y_1Z"); - num_elems_read_per_iteration_y = 5; - num_elems_written_per_iteration_y = 3; -#endif /* PROCESS_4X_3Y_1Z */ -#undef PROCESS_4X_3Y_1Z - break; - - default: - ARM_COMPUTE_ERROR("Current data type is not supported"); - break; - } - } - else - { - switch(input->info()->data_type()) - { - case DataType::F16: - options.emplace("#define PROCESS_4X_1Y_1Z"); - break; - - default: - ARM_COMPUTE_ERROR("Current data type is not supported"); - break; - } - } - - if(_biases != nullptr) - { - options.emplace("#define BIAS"); - } - - // Create kernel - std::string kernel_name = "depthwise_convolution_3x3"; - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, options)); - - // Calculate output right and bottom border - const int output_width = output->info()->dimension(0); - const int output_height = output->info()->dimension(1); - const int output_padding_right = ceil_to_multiple(output_width, num_elems_written_per_iteration_x * _lws[0]) - output_width; - const int output_padding_bottom = ceil_to_multiple(output_height, num_elems_written_per_iteration_y * _lws[1]) - output_height; - - // Calculate input right and bottom border - const int input_width = input->info()->dimension(0); - const int input_height = input->info()->dimension(1); - - const int input_total_width = std::max(int(input->info()->padding().left), int(_conv_pad_left)) + input_width + std::max(int(input->info()->padding().right), int(_conv_pad_left)); - const int input_total_height = std::max(int(input->info()->padding().top), int(_conv_pad_top)) + input_height + std::max(int(input->info()->padding().bottom), int(_conv_pad_top)); - - const int input_padding_right = ceil_to_multiple(input_total_width, num_elems_read_per_iteration_x * _lws[0]) - input_width - _conv_pad_left; - const int input_padding_bottom = ceil_to_multiple(input_total_height, num_elems_read_per_iteration_y * _lws[1]) - input_height - _conv_pad_top; - - BorderSize border = BorderSize(0, output_padding_right, output_padding_bottom, 0); - - Window win = calculate_max_enlarged_window(*output->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y, num_elems_written_per_iteration_z), border); - - AccessWindowStatic input_access(input->info(), -_conv_pad_left, -_conv_pad_top, input_width + input_padding_right, input_height + input_padding_bottom); - AccessWindowStatic weights_access = AccessWindowStatic(nullptr, 0, 0, 0, 0); - AccessWindowStatic bias_access = AccessWindowStatic(nullptr, 0, 0, 0, 1); - - switch(weights->info()->data_type()) - { - case DataType::F16: - weights_access = AccessWindowStatic(weights->info(), 0, 0, 4, 3); - if(_biases != nullptr) - { - bias_access = AccessWindowStatic(_biases->info(), 0, 0, _biases->info()->dimension(0) + 1, 1); - } - break; - - default: - ARM_COMPUTE_ERROR("Current data type is not supported"); - break; - } - - AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height + output_padding_bottom); - - if(_biases != nullptr) - { - update_window_and_padding(win, input_access, weights_access, bias_access, output_access); - } - else - { - update_window_and_padding(win, input_access, weights_access, output_access); - } - - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - - IGCKernel::configure(win); -} - -void GCDepthwiseConvolutionLayer3x3Kernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - _kernel.use(); - - _output->set_needs_shifting(true); - - // Create input window and adjust - Window win_in = window; - win_in.adjust(Window::DimX, -_conv_pad_left, true); - win_in.adjust(Window::DimY, -_conv_pad_top, true); - win_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x); - win_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y); - - Window slice_in = win_in.first_slice_window_3D(); - Window slice_out = window.first_slice_window_3D(); - Window slice_weights = window.first_slice_window_3D(); - slice_weights.set_dimension_step(Window::DimX, 0); - slice_weights.set_dimension_step(Window::DimY, 0); - - // Set biases - if(_biases != nullptr) - { - unsigned int idx = 3 * num_arguments_per_3D_tensor(); - Window slice_biases; - slice_biases.use_tensor_dimensions(_biases->info()->tensor_shape()); - add_1D_tensor_argument(idx, _biases, 4, slice_biases); - } - - slice_out.shift(Window::DimX, -(_output->info()->padding()).left); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, 1, slice_in); - add_3D_tensor_argument(idx, _output, 2, slice_out); - add_3D_tensor_argument(idx, _weights, 3, slice_weights); - - _kernel.update_shader_params(); - enqueue(*this, slice_out, _lws); - } - while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp deleted file mode 100644 index 9ce8acea09..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp +++ /dev/null @@ -1,450 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -template <unsigned int kernel_size> -GCDirectConvolutionLayerKernel<kernel_size>::GCDirectConvolutionLayerKernel() - : _input(nullptr), _bias(nullptr), _weights(nullptr), _output(nullptr), _border_size(0), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_x(0), _conv_pad_y(0), _lws(gles::NDRange(1U, 1U, 1U)) -{ -} - -template <unsigned int kernel_size> -BorderSize GCDirectConvolutionLayerKernel<kernel_size>::border_size() const -{ - return _border_size; -} - -template <unsigned int kernel_size> -void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output, - const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2)); - ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != weights->info()->dimension(1)); - ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); - ARM_COMPUTE_ERROR_ON_MSG((kernel_size == 3 && std::get<0>(conv_info.stride()) > 2), "Strides larger than 2 not supported in 3x3 direct convolution!"); - ARM_COMPUTE_ERROR_ON(kernel_size != weights->info()->dimension(0)); - ARM_COMPUTE_ERROR_ON(act_info.enabled() && act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU && act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC); - - if(bias != nullptr) - { - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, bias); - // FIXME: Bug in framework, workaround it in tests currently. - //ARM_COMPUTE_ERROR_ON(bias->info()->dimension(0) != weights->info()->dimension(3)); - ARM_COMPUTE_ERROR_ON(bias->info()->num_dimensions() > 1); - } - - // Get convolved dimensions - unsigned int owidth = 0; - unsigned int oheight = 0; - std::tie(owidth, oheight) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_size, kernel_size, conv_info); - - TensorShape output_shape = input->info()->tensor_shape(); - output_shape.set(0, owidth); - output_shape.set(1, oheight); - output_shape.set(2, weights->info()->dimension(3)); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON(!conv_info.padding_is_symmetric()); - - _conv_stride_x = std::get<0>(conv_info.stride()); - _conv_stride_y = std::get<1>(conv_info.stride()); - _conv_pad_x = std::get<0>(conv_info.pad()); - _conv_pad_y = std::get<1>(conv_info.pad()); - - _input = input; - _weights = weights; - _output = output; - _bias = bias; - _border_size = BorderSize(_conv_pad_y, _conv_pad_x); - - std::set<std::string> options; - - options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0])); - options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1])); - options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2])); - options.emplace("#define STRIDE_X " + support::cpp11::to_string(_conv_stride_x)); - options.emplace("#define STRIDE_Y " + support::cpp11::to_string(_conv_stride_y)); - - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - options.emplace(("#define " + dt_name)); - - // Activation information in case of a fused activation - if(act_info.enabled()) - { - options.emplace("#define FUSED_ACTIVATION"); - options.emplace(("#define " + string_from_activation_func(act_info.activation()))); - options.emplace(("#define ACT_OP " + lower_string(string_from_activation_func(act_info.activation())) + "_op")); - options.emplace(("#define A_VAL " + float_to_string_with_full_precision(act_info.a()))); - options.emplace(("#define B_VAL " + float_to_string_with_full_precision(act_info.b()))); - } - - unsigned int num_elems_read_per_iteration_x = kernel_size * _conv_stride_x; - unsigned int num_elems_read_per_iteration_y = 1; - unsigned int num_elems_written_per_iteration_x = 1; - unsigned int num_elems_written_per_iteration_y = 1; - unsigned int num_elems_written_per_iteration_z = 1; - - if(kernel_size == 3) - { - if((_conv_stride_x == 1) && (_conv_stride_y == 1)) - { - switch(input->info()->data_type()) - { - case DataType::F16: - // TODO(APPBROWSER-299): Choose the most optimal path and remove others. -#define PROCESS_4X_3Y_1Z - -#if defined(PROCESS_8X_3Y_1Z) - options.emplace("#define PROCESS_8X_3Y_1Z"); - num_elems_read_per_iteration_x = 16; - num_elems_read_per_iteration_y = 5; - num_elems_written_per_iteration_x = 8; - num_elems_written_per_iteration_y = 3; -#elif defined(PROCESS_4X_3Y_1Z) - options.emplace("#define PROCESS_4X_3Y_1Z"); - num_elems_read_per_iteration_x = 8; - num_elems_read_per_iteration_y = 5; - num_elems_written_per_iteration_x = 4; - num_elems_written_per_iteration_y = 3; -#elif defined(PROCESS_4X_4Y_1Z) - options.emplace("#define PROCESS_4X_4Y_1Z"); - num_elems_read_per_iteration_x = 8; - num_elems_read_per_iteration_y = 6; - num_elems_written_per_iteration_x = 4; - num_elems_written_per_iteration_y = 4; -#elif defined(PROCESS_4X_3Y_2Z) - options.emplace("#define PROCESS_4X_3Y_2Z"); - num_elems_read_per_iteration_x = 8; - num_elems_read_per_iteration_y = 5; - num_elems_written_per_iteration_x = 4; - num_elems_written_per_iteration_y = 3; - num_elems_written_per_iteration_z = 2; -#endif /* PROCESS_nX_nY_nZ */ -#undef PROCESS_8X_3Y_1Z -#undef PROCESS_4X_3Y_1Z -#undef PROCESS_4X_4Y_1Z -#undef PROCESS_4X_3Y_2Z - break; - - case DataType::F32: - options.emplace("#define PROCESS_4X_3Y_1Z"); - num_elems_read_per_iteration_x = 8; - num_elems_read_per_iteration_y = 5; - num_elems_written_per_iteration_x = 4; - num_elems_written_per_iteration_y = 3; - break; - - default: - ARM_COMPUTE_ERROR("Current data type is not supported"); - break; - } - } - // FIXME: Just keep one in release - else - { - switch(input->info()->data_type()) - { - case DataType::F16: - options.emplace("#define PROCESS_4X_1Y_1Z"); - num_elems_read_per_iteration_x = 8; - num_elems_written_per_iteration_x = 4; - break; - - case DataType::F32: - // TODO(APPBROWSER-299): Choose the most optimal path and remove others. -#define PROCESS_4X_1Y_1Z - -#if defined(PROCESS_1X_1Y_1Z) - options.emplace("#define PROCESS_1X_1Y_1Z"); - num_elems_read_per_iteration_x = 3; - num_elems_written_per_iteration_x = 1; -#elif defined(PROCESS_4X_1Y_1Z) - options.emplace("#define PROCESS_4X_1Y_1Z"); - num_elems_read_per_iteration_x = 8; - num_elems_written_per_iteration_x = 4; -#elif defined(PROCESS_8X_1Y_1Z) - options.emplace("#define PROCESS_8X_1Y_1Z"); - num_elems_read_per_iteration_x = 12; - num_elems_written_per_iteration_x = 8; -#else /* PROCESS_nX_nY_nZ */ -#error Have to declare how many elements to process in one thread. -#endif /* PROCESS_nX_nY_nZ */ -#undef PROCESS_1X_1Y_1Z -#undef PROCESS_4X_1Y_1Z -#undef PROCESS_8X_1Y_1Z - break; - - default: - ARM_COMPUTE_ERROR("Current data type is not supported"); - break; - } - } - } - else if(kernel_size == 1) - { - if(weights->info()->dimension(2) % 2 == 0) - { - options.emplace("#define WEIGHTS_OPTIMIZATION"); - } - switch(input->info()->data_type()) - { - case DataType::F16: -#define PROCESS_8X_2Y_1Z - -#if defined(PROCESS_4X_1Y_1Z) - options.emplace("#define PROCESS_4X_1Y_1Z"); - num_elems_read_per_iteration_x = 4; - num_elems_written_per_iteration_x = 4; -#elif defined(PROCESS_4X_2Y_1Z) - options.emplace("#define PROCESS_4X_2Y_1Z"); - num_elems_read_per_iteration_x = 4; - num_elems_read_per_iteration_y = 2; - num_elems_written_per_iteration_x = 4; - num_elems_written_per_iteration_y = 2; -#elif defined(PROCESS_4X_3Y_1Z) - options.emplace("#define PROCESS_4X_3Y_1Z"); - num_elems_read_per_iteration_x = 4; - num_elems_read_per_iteration_y = 3; - num_elems_written_per_iteration_x = 4; - num_elems_written_per_iteration_y = 3; -#elif defined(PROCESS_4X_4Y_1Z) - options.emplace("#define PROCESS_4X_4Y_1Z"); - num_elems_read_per_iteration_x = 4; - num_elems_read_per_iteration_y = 4; - num_elems_written_per_iteration_x = 4; - num_elems_written_per_iteration_y = 4; -#elif defined(PROCESS_4X_2Y_2Z) - ARM_COMPUTE_ERROR_ON_MSG((weights->info()->dimension(4) % 2) == 1, "Current 'weights->info()->dimension(4) % 2) == 1' is not supported"); - options.emplace("#define PROCESS_4X_2Y_2Z"); - num_elems_read_per_iteration_x = 4; - num_elems_read_per_iteration_y = 2; - num_elems_written_per_iteration_x = 4; - num_elems_written_per_iteration_y = 2; - num_elems_written_per_iteration_z = 2; -#elif defined(PROCESS_8X_1Y_1Z) - options.emplace("#define PROCESS_8X_1Y_1Z"); - num_elems_read_per_iteration_x = 8; - num_elems_written_per_iteration_x = 8; -#elif defined(PROCESS_8X_2Y_1Z) - options.emplace("#define PROCESS_8X_2Y_1Z"); - num_elems_read_per_iteration_x = 8; - num_elems_read_per_iteration_y = 2; - num_elems_written_per_iteration_x = 8; - num_elems_written_per_iteration_y = 2; -#else /* PROCESS_4X_1Y_1Z */ -#error Have to declare how many elements to process in one thread. -#endif /* PROCESS_4X_1Y_1Z */ -#undef PROCESS_4X_1Y_1Z -#undef PROCESS_4X_2Y_1Z -#undef PROCESS_4X_3Y_1Z -#undef PROCESS_4X_4Y_1Z -#undef PROCESS_4X_2Y_2Z -#undef PROCESS_8X_1Y_1Z -#undef PROCESS_8X_2Y_1Z - break; - - case DataType::F32: - num_elems_read_per_iteration_x = 1; - num_elems_written_per_iteration_x = 1; - break; - - default: - break; - } - } - else if(kernel_size == 5) - { - switch(input->info()->data_type()) - { - case DataType::F16: - options.emplace("#define PROCESS_4X_1Y_1Z"); - num_elems_read_per_iteration_x = 8; - num_elems_written_per_iteration_x = 4; - - default: - break; - } - } - else - { - } - - if(_bias != nullptr) - { - options.emplace("#define BIAS"); - } - - std::stringstream kernel_name; - kernel_name << "direct_convolution" << kernel_size << "x" << kernel_size; - - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name.str(), options)); - - unsigned int idx = (_bias == nullptr) ? 3 * num_arguments_per_3D_tensor() : (num_arguments_per_1D_tensor() + 3 * num_arguments_per_3D_tensor()); - - // Calculate output right and bottom border - const int output_width = output->info()->dimension(0); - const int output_height = output->info()->dimension(1); - const int output_padding_right = ceil_to_multiple(output_width, num_elems_written_per_iteration_x * _lws[0]) - output_width; - const int output_padding_bottom = ceil_to_multiple(output_height, num_elems_written_per_iteration_y * _lws[1]) - output_height; - - // Calculate input right and bottom border - const int input_width = input->info()->dimension(0); - const int input_height = input->info()->dimension(1); - const int input_total_width = std::max(int(input->info()->padding().left), int(_conv_pad_x)) + input_width + std::max(int(input->info()->padding().right), int(_conv_pad_x)); - const int input_total_height = std::max(int(input->info()->padding().top), int(_conv_pad_y)) + input_height + std::max(int(input->info()->padding().bottom), int(_conv_pad_y)); - const int padding_right1 = ceil_to_multiple(input_total_width, num_elems_read_per_iteration_x * _lws[0]) - input_width - _conv_pad_x; - const int padding_bottom1 = ceil_to_multiple(input_total_height, num_elems_read_per_iteration_y * _lws[1]) - input_height - _conv_pad_y; - - const int upper_bound_w = ceil_to_multiple(((output_width + output_padding_right) * _conv_stride_x + (kernel_size - 1)), num_elems_read_per_iteration_x * _lws[0]) - _conv_pad_x - input_width; - const int upper_bound_h = ceil_to_multiple(((output_height + output_padding_bottom) * _conv_stride_y + (kernel_size - 1)), num_elems_read_per_iteration_y * _lws[1]) - _conv_pad_y - input_height; - const int padding_right2 = std::max(upper_bound_w, _conv_pad_x); - const int padding_bottom2 = std::max(upper_bound_h, _conv_pad_y); - - const int padding_right = std::max(padding_right1, padding_right2); - const int padding_bottom = std::max(padding_bottom1, padding_bottom2); - - BorderSize border = BorderSize(0, output_padding_right, output_padding_bottom, 0); - - Window win = calculate_max_enlarged_window(*output->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y, num_elems_written_per_iteration_z), border); - - AccessWindowStatic input_access(input->info(), -_conv_pad_x, -_conv_pad_y, input_width + padding_right, input_height + padding_bottom); - AccessWindowStatic weights_access = AccessWindowStatic(nullptr, 0, 0, 0, 0); - AccessWindowStatic bias_access = AccessWindowStatic(nullptr, 0, 0, 0, 1); - - switch(weights->info()->data_type()) - { - case DataType::F16: - if((weights->info()->dimension(2) % 2 != 0) || (kernel_size != 1)) - { - weights_access = AccessWindowStatic(weights->info(), 0, 0, kernel_size + 1, kernel_size); - } - if(_bias != nullptr) - { - bias_access = AccessWindowStatic(_bias->info(), 0, 0, _bias->info()->dimension(0) + 1, 1); - } - break; - - case DataType::F32: - weights_access = AccessWindowStatic(weights->info(), 0, 0, kernel_size, kernel_size); - if(_bias != nullptr) - { - bias_access = AccessWindowStatic(_bias->info(), 0, 0, _bias->info()->dimension(0), 1); - } - break; - - default: - ARM_COMPUTE_ERROR("Current data type is not supported"); - break; - } - - AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height + output_padding_bottom); - - if(_bias != nullptr) - { - update_window_and_padding(win, input_access, weights_access, bias_access, output_access); - } - else - { - update_window_and_padding(win, input_access, weights_access, output_access); - } - - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - - _kernel.set_argument(idx++, _weights->info()->strides_in_bytes()[3]); // weights_stride_w - _kernel.set_argument(idx++, _weights->info()->dimension(2)); // weights_depth - - IGCKernel::configure(win); -} - -template <unsigned int kernel_size> -void GCDirectConvolutionLayerKernel<kernel_size>::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - _kernel.use(); - - _output->set_needs_shifting(true); - - // Get initial windows - Window slice = window.first_slice_window_3D(); - Window win_in = window; - - win_in.adjust(Window::DimX, -_conv_pad_x, true); - win_in.adjust(Window::DimY, -_conv_pad_y, true); - win_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x); - win_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y); - - Window slice_in = win_in.first_slice_window_3D(); - - unsigned int idx1 = 2 * num_arguments_per_3D_tensor(); - add_3D_tensor_argument(idx1, _weights, 3, slice); - - if(_bias != nullptr) - { - Window slice_bias; - slice_bias.use_tensor_dimensions(_bias->info()->tensor_shape()); - add_1D_tensor_argument(idx1, _bias, 4, slice_bias); - } - - slice.shift(Window::DimX, -(_output->info()->padding()).left); - - do - { - unsigned int idx = 0; - - add_3D_tensor_argument(idx, _input, 1, slice_in); - add_3D_tensor_argument(idx, _output, 2, slice); - - _kernel.update_shader_params(); - enqueue(*this, slice, _lws); - } - while(window.slide_window_slice_3D(slice) && win_in.slide_window_slice_3D(slice_in)); -} - -template class arm_compute::GCDirectConvolutionLayerKernel<1>; -template class arm_compute::GCDirectConvolutionLayerKernel<3>; -template class arm_compute::GCDirectConvolutionLayerKernel<5>; diff --git a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp deleted file mode 100644 index bda6599f86..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include <cmath> -#include <random> -#include <tuple> - -using namespace arm_compute; - -GCDropoutLayerKernel::GCDropoutLayerKernel() - : _input(nullptr), _mask(nullptr), _output(nullptr), _num_elems_processed_per_iteration(0) -{ -} - -void GCDropoutLayerKernel::configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, mask, output); - - _input = input; - _mask = mask; - _output = output; - - std::set<std::string> build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - std::string fporbp = forward ? "FORWARD" : "BACKWARD"; - std::random_device rd; - std::mt19937 mt(rd()); - std::uniform_real_distribution<float> dist(0.f, 1.f); - - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - build_opts.emplace("#define RATIO " + support::cpp11::to_string(ratio)); - build_opts.emplace("#define SCALE " + support::cpp11::to_string(1. / (1. - ratio))); - build_opts.emplace("#define SEED " + support::cpp11::to_string(dist(mt))); - build_opts.emplace("#define " + dt_name); - build_opts.emplace("#define " + fporbp); - - _num_elems_processed_per_iteration = 4 / input->info()->element_size(); - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("dropout", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration)); - - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - - IGCKernel::configure(win); -} - -void GCDropoutLayerKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); - - _kernel.use(); - - Window slice = window.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - - add_3D_tensor_argument(idx, _input, 1, slice); - add_3D_tensor_argument(idx, _mask, 2, slice); - add_3D_tensor_argument(idx, _output, 3, slice); - - _kernel.update_shader_params(); - enqueue(*this, slice); - } - while(window.slide_window_slice_3D(slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp deleted file mode 100644 index 7ffcdd2f3f..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include <cstdint> -#include <set> -#include <string> - -using namespace arm_compute; - -GCFillBorderKernel::GCFillBorderKernel() - : IGCKernel(), _tensor(nullptr) -{ -} - -bool GCFillBorderKernel::is_parallelisable() const -{ - return false; -} - -template <class T> -void GCFillBorderKernel::set_constant_border(unsigned int idx, const PixelValue &constant_border_value) -{ - T value; - constant_border_value.get(value); - _kernel.set_argument(idx, static_cast<T>(value)); -} - -void GCFillBorderKernel::configure(const IGCTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value) -{ - ARM_COMPUTE_ERROR_ON(tensor == nullptr); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::F32, DataType::F16); - ARM_COMPUTE_ERROR_ON(tensor->info()->num_channels() != 1); - - border_size.limit(tensor->info()->padding()); - - // If there is no border: early exit - if(border_size.empty() || border_mode == BorderMode::UNDEFINED) - { - return; - } - - // Select appropriate kernel - std::string kernel_name = "fill_image_borders_" + lower_string(string_from_border_mode(border_mode)); - - // Define build options - std::set<std::string> build_opts; - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - build_opts.emplace("#define BORDER_SIZE_TOP " + support::cpp11::to_string(border_size.top)); - build_opts.emplace("#define BORDER_SIZE_BOTTOM " + support::cpp11::to_string(border_size.bottom)); - build_opts.emplace("#define BORDER_SIZE_LEFT " + support::cpp11::to_string(border_size.left)); - build_opts.emplace("#define BORDER_SIZE_RIGHT " + support::cpp11::to_string(border_size.right)); - - if(border_mode == BorderMode::REPLICATE) - { - build_opts.emplace("#define FILL_IMAGE_BORDERS_REPLICATE\n"); - } - else - { - build_opts.emplace("#define FILL_IMAGE_BORDERS_CONSTANT\n"); - } - - switch(tensor->info()->data_type()) - { - case DataType::F16: - build_opts.emplace("#define DATA_TYPE_FP16"); - break; - - case DataType::F32: - build_opts.emplace("#define DATA_TYPE_FP32"); - break; - - default: - ARM_COMPUTE_ERROR("Current data type is not supported"); - break; - } - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, build_opts)); - _tensor = tensor; - - // Create static kernel arguments - const unsigned int valid_width = tensor->info()->valid_region().shape[0]; - const unsigned int valid_height = tensor->info()->valid_region().shape[1]; - const unsigned int total_valid_width = border_size.left + valid_width + border_size.right; - - // Set static kernel arguments - unsigned int idx = num_arguments_per_3D_tensor(); //Skip the tensor parameters - _kernel.set_argument(idx++, valid_width); - _kernel.set_argument(idx++, valid_height); - _kernel.set_argument(idx++, tensor->info()->valid_region().anchor[0]); - _kernel.set_argument(idx++, tensor->info()->valid_region().anchor[1]); - - if(BorderMode::CONSTANT == border_mode) - { - set_constant_border<float>(idx++, constant_border_value); - } - - // Configure kernel window - Window win; - win.set(Window::DimX, Window::Dimension(0, total_valid_width + valid_height)); - win.set(Window::DimY, Window::Dimension(0, 1, 1)); - win.use_tensor_dimensions(tensor->info()->tensor_shape(), Window::DimZ); - - IGCKernel::configure(win); -} - -void GCFillBorderKernel::run(const Window &window) -{ - // Border mode undefined or border width == 0 - if(_kernel.get_program() == 0) - { - return; - } - - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); - - _kernel.use(); - Window slice = window.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _tensor, 1, slice); - - _kernel.update_shader_params(); - - enqueue(*this, slice); - } - while(window.slide_window_slice_3D(slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp deleted file mode 100644 index d395759558..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -GCGEMMInterleave4x4Kernel::GCGEMMInterleave4x4Kernel() - : _input(nullptr), _output(nullptr) -{ -} - -void GCGEMMInterleave4x4Kernel::configure(const IGCTensor *input, IGCTensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); - - TensorShape output_shape = input->info()->tensor_shape(); - output_shape.set(0, input->info()->dimension(0) * 4); - output_shape.set(1, std::ceil(input->info()->dimension(1) / 4.0f)); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - _input = input; - _output = output; - - std::set<std::string> build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.emplace(("#define " + dt_name)); - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - - // Create kernel - build_opts.emplace("#define GEMM_INTERLEAVE4x4"); - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("gemm_interleave4x4", build_opts)); - - // Configure kernel window - const unsigned int num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(input->info()->data_type()); - constexpr unsigned int num_elems_processed_per_iteration_y = 4; - const unsigned int num_elems_written_per_iteration = num_elems_processed_per_iteration_x * num_elems_processed_per_iteration_y; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); - - AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); - AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration, 1, 4.f, 0.25f); - - update_window_and_padding(win, input_access, output_access); - - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - - IGCKernel::configure(win); -} - -void GCGEMMInterleave4x4Kernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); - - _kernel.use(); - - /* - * This kernel puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) - * |a00 a01 a02 a03| - * |a10 a11 a12 a13| - * |a20 a21 a22 a23| = | a00 a10 a20 a30 || a01 a11 a21 a31 || a02 a12 a22 a32 || a03 a13 a23 a33 | - * |a30 a31 a32 a33| - * - * After this operation, the output matrix will have the following shape: [ height * 4, width / 4 ] - */ - Window in_slice = window.first_slice_window_2D(); - Window out_slice = window.first_slice_window_2D(); - - // Change x and y steps for the slide of output tensor - out_slice.scale(Window::DimX, 4.f); - out_slice.scale(Window::DimY, 0.25f); - - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, 1, in_slice); - add_2D_tensor_argument(idx, _output, 2, out_slice); - - _kernel.update_shader_params(); - - enqueue(*this, in_slice); - } - while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp deleted file mode 100644 index 66fdde5473..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -GCGEMMMatrixAccumulateBiasesKernel::GCGEMMMatrixAccumulateBiasesKernel() - : _accum(nullptr), _biases(nullptr), _lws(gles::NDRange(1U, 1U, 1U)) -{ -} - -void GCGEMMMatrixAccumulateBiasesKernel::configure(IGCTensor *accum, const IGCTensor *biases) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum); - ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() != 1); - - _biases = biases; - _accum = accum; - - std::set<std::string> build_opts; - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0])); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1])); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2])); - - // Create kernel - build_opts.emplace("#define GEMM_ACCUMULATE_BIASES"); - -#define ACCUM_PROCESS_4X - -#if defined(ACCUM_PROCESS_4X) - build_opts.emplace("#define ACCUM_PROCESS_4X"); -#elif defined(ACCUM_PROCESS_8X) /* ACCUM_PROCESS_4X */ - build_opts.emplace("#define ACCUM_PROCESS_8X"); -#endif /* ACCUM_PROCESS_4X */ - std::string dt_name = (accum->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.emplace(("#define " + dt_name)); - - _kernel = GCKernelLibrary::get().create_kernel("gemm_accumulate_biases", build_opts); - - // Configure kernel window - unsigned int num_elems_processed_per_iteration = 1; - - if(_accum->info()->data_type() == DataType::F32) - { - num_elems_processed_per_iteration = 16; - } - else if(_accum->info()->data_type() == DataType::F16) - { -#if defined(ACCUM_PROCESS_4X) - num_elems_processed_per_iteration = 4; -#elif defined(ACCUM_PROCESS_8X) /* ACCUM_PROCESS_4X */ - num_elems_processed_per_iteration = 8; -#endif /* ACCUM_PROCESS_4X */ - } - - const int accum_width = accum->info()->dimension(0); - const int accum_padding_right = ceil_to_multiple(accum_width, num_elems_processed_per_iteration * _lws[0]) - accum_width; - BorderSize border = BorderSize(0, accum_padding_right, 0, 0); - - Window win = calculate_max_enlarged_window(*_accum->info(), Steps(num_elems_processed_per_iteration), border); - - AccessWindowStatic biases_access(biases->info(), 0, 0, ceil_to_multiple(biases->info()->dimension(0), num_elems_processed_per_iteration * _lws[0]), biases->info()->dimension(1)); - AccessWindowStatic accum_access(_accum->info(), 0, 0, accum_width + accum_padding_right, _accum->info()->dimension(1)); - - update_window_and_padding(win, biases_access, accum_access); - - IGCKernel::configure(win); -} - -void GCGEMMMatrixAccumulateBiasesKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); - - _kernel.use(); - - Window accum_slice = window.first_slice_window_2D(); - - Window biases_slice(accum_slice); - biases_slice.set(Window::DimY, Window::Dimension(0, 1, 1)); - - // Run kernel - do - { - // Set arguments - unsigned int idx = 0; - - add_2D_tensor_argument(idx, _accum, 1, accum_slice); - add_1D_tensor_argument(idx, _biases, 2, biases_slice); - _kernel.update_shader_params(); - - enqueue(*this, accum_slice, _lws); - } - while(window.slide_window_slice_2D(accum_slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp deleted file mode 100644 index daad70bba9..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -GCGEMMMatrixAdditionKernel::GCGEMMMatrixAdditionKernel() - : _input(nullptr), _output(nullptr) -{ -} - -void GCGEMMMatrixAdditionKernel::configure(const IGCTensor *input, IGCTensor *output, float beta) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0)); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1)); - - _input = input; - _output = output; - const unsigned int num_elems_processed_per_iteration = max_gc_vector_width / data_size_from_type(input->info()->data_type()); - - std::set<std::string> build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.emplace(("#define " + dt_name)); - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - build_opts.emplace("#define BETA " + float_to_string_with_full_precision(beta)); - - // Create kernel - build_opts.emplace("#define GEMM_MATRIXADDITION"); - std::string data_type_name = lower_string(string_from_data_type(input->info()->data_type())); - _kernel = GCKernelLibrary::get().create_kernel(("gemm_ma"), build_opts); - - // Configure kernel window - Window win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region()); - - IGCKernel::configure(win); -} - -void GCGEMMMatrixAdditionKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); - - _kernel.use(); - - Window slice = window.first_slice_window_2D(); - - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, 1, slice); - add_2D_tensor_argument(idx, _output, 2, slice); - - _kernel.update_shader_params(); - - enqueue(*this, slice); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp deleted file mode 100644 index 2f69728b61..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp +++ /dev/null @@ -1,338 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/AccessWindowTranspose.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include <set> -#include <string> - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -namespace -{ -using ElementsProcessed = Steps; - -inline Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info) -{ - ARM_COMPUTE_UNUSED(reshape_info); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 3, "The number of dimensions for the matrix B must be <= 3"); - - if(!is_interleaved_transposed) - { - ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != input1->dimension(1)); - - if(output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != output->dimension(0)); - ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != output->dimension(1)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output); - } - } - else - { - const int m = reshape_info.m(); - const int n = reshape_info.n(); - const int k = reshape_info.k(); - const int mult_transpose1xW_width = reshape_info.mult_transpose1xW_width(); - const int mult_interleave4x4_height = reshape_info.mult_interleave4x4_height(); - - TensorShape tensor_shape0{ input0->tensor_shape() }; - tensor_shape0.set(0, k); - tensor_shape0.set(1, m); - - TensorShape tensor_shape1{ input1->tensor_shape() }; - tensor_shape1.set(0, n); - tensor_shape1.set(1, k); - - const TensorInfo tensor_info0 = input0->clone()->set_tensor_shape(tensor_shape0); - const TensorInfo tensor_info1 = input1->clone()->set_tensor_shape(tensor_shape1); - - const TensorInfo tensor_info_reshaped0 = input0->clone()->set_tensor_shape(compute_interleaved_shape(tensor_info0, mult_interleave4x4_height)); - const TensorInfo tensor_info_reshaped1 = input1->clone()->set_tensor_shape(compute_transpose1xW_with_element_size_shape(tensor_info1, mult_transpose1xW_width)); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input0, &tensor_info_reshaped0); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, &tensor_info_reshaped1); - - if(output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != static_cast<size_t>(n)); - ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != static_cast<size_t>(m)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output); - } - } - - return Status{}; -} - -inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output, - bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, - GPUTarget gpu_target, ElementsProcessed &num_elements_processed) -{ - ARM_COMPUTE_UNUSED(gpu_target); - - // Output tensor auto inizialitation if not yet initialized - TensorShape tensor_shape{ input0->tensor_shape() }; - tensor_shape.set(0, is_interleaved_transposed ? reshape_info.n() : input1->dimension(0)); - tensor_shape.set(1, is_interleaved_transposed ? reshape_info.m() : input0->dimension(1)); - - auto_init_if_empty(*output, input0->clone()->set_tensor_shape(tensor_shape)); - - bool window_changed = false; - Window win{}; - - const DataType data_type = input0->data_type(); - unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; - unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; - - if(is_interleaved_transposed) - { - // Configure window kernel - num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(data_type); - num_elems_processed_per_iteration_y = 4; - - win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); - - AccessWindowRectangle input0_access(input0, 0, 0, num_elems_processed_per_iteration_y, 1, 1.f, 0.25f); - AccessWindowTranspose input1_access(input1, 0, 0, num_elems_processed_per_iteration_x, 1, 0.f, 0.25f); - AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); - - update_window_and_padding(win, input0_access, input1_access, output_access); - - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); - } - else // The input tensors have not been reshaped - { - // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor. - num_elems_processed_per_iteration_y = std::min(static_cast<int>(output->dimension(1)), 4); - - switch(data_type) - { - case DataType::F16: - num_elems_processed_per_iteration_x = 4; - break; - - case DataType::F32: - num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(data_type); - break; - - default: - ARM_COMPUTE_ERROR("Current data type is not supported"); - break; - } - - win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); - - AccessWindowStatic input0_access(input0, 0, 0, ceil_to_multiple(input0->dimension(0), 8), ceil_to_multiple(input0->dimension(1), num_elems_processed_per_iteration_y)); - AccessWindowStatic input1_access(input1, 0, 0, ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration_x), input1->dimension(1)); - AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); - - update_window_and_padding(win, input0_access, input1_access, output_access); - - Coordinates coord; - coord.set_num_dimensions(output->num_dimensions()); - output_access.set_valid_region(win, ValidRegion(coord, output->tensor_shape())); - } - - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_pair(err, win); -} -} // namespace - -GCGEMMMatrixMultiplyKernel::GCGEMMMatrixMultiplyKernel() - : _input0(nullptr), _input1(nullptr), _output(nullptr) -{ -} - -void GCGEMMMatrixMultiplyKernel::configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); - - // Perform validate step - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info)); - - _input0 = input0; - _input1 = input1; - _output = output; - - // Get target architecture - GPUTarget gpu_target = get_target(); - - ElementsProcessed num_elements_processed{}; - - // Configure kernel window - auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info, gpu_target, num_elements_processed); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - IGCKernel::configure(win_config.second); - - // Create build options - std::set<std::string> build_opts; - std::string kernel_name; - - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - build_opts.emplace("#define COLS_A " + support::cpp11::to_string(input0->info()->dimension(0))); - build_opts.emplace("#define COLS_B " + support::cpp11::to_string(input1->info()->dimension(0))); - build_opts.emplace("#define ALPHA " + float_to_string_with_full_precision(alpha)); - - // Check if the output tensor is a vector. If so,the kernel runs the vector-matrix multiplication - if(is_interleaved_transposed) - { - const int mult_transpose1xW_width = reshape_info.mult_transpose1xW_width(); - const int mult_interleave4x4_height = reshape_info.mult_interleave4x4_height(); - - build_opts.emplace("#define MULT_TRANSPOSE1XW_WIDTH " + support::cpp11::to_string(mult_transpose1xW_width)); - build_opts.emplace("#define MULT_INTERLEAVE4X4_HEIGHT " + support::cpp11::to_string(mult_interleave4x4_height)); - - switch(input0->info()->data_type()) - { - case DataType::F16: - build_opts.emplace("#define DATA_TYPE_FP16"); - break; - - case DataType::F32: - build_opts.emplace("#define DATA_TYPE_FP32"); - break; - - default: - ARM_COMPUTE_ERROR("Current data type is not supported"); - break; - } - - build_opts.emplace("#define GEMM_MM_INTERLEAVED_TRANSPOSED"); - - kernel_name = "gemm_mm_interleaved_transposed"; - } - else - { - // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor - - GPUTarget arch_target = get_arch_from_target(gpu_target); - switch(input0->info()->data_type()) - { - case DataType::F16: - build_opts.emplace("#define DATA_TYPE_FP16"); - build_opts.emplace("#define MM_PROCESS_4X_OPTIMIZED"); - build_opts.emplace("#define GEMM_MM_FLOATING_POINT"); - break; - - case DataType::F32: - build_opts.emplace("#define DATA_TYPE_FP32"); - - if(arch_target == GPUTarget::BIFROST && input0->info()->num_dimensions() != 1) - { - build_opts.emplace("#define GEMM_MM_FLOATING_POINT_BIFROST"); - } - else - { - build_opts.emplace("#define GEMM_MM_FLOATING_POINT"); - } - break; - - default: - ARM_COMPUTE_ERROR("Current data type is not supported"); - break; - } - - build_opts.emplace("#define NUM_ELEMS_PROCESSED_PER_THREAD_X " + support::cpp11::to_string(num_elements_processed.x())); - build_opts.emplace("#define NUM_ELEMS_PROCESSED_PER_THREAD_Y " + support::cpp11::to_string(num_elements_processed.y())); - - kernel_name = "gemm_mm_floating_point"; - } - - // Create kernel - _kernel = GCKernelLibrary::get().create_kernel(kernel_name, build_opts); -} - -Status GCGEMMMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved_transposed, - const GEMMReshapeInfo &reshape_info, GPUTarget gpu_target) -{ - ARM_COMPUTE_UNUSED(alpha); - ElementsProcessed num_elements_processed{}; - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output, is_interleaved_transposed, reshape_info)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input0->clone().get(), - input1->clone().get(), - output->clone().get(), - is_interleaved_transposed, - reshape_info, - gpu_target, - num_elements_processed) - .first); - return Status{}; -} - -void GCGEMMMatrixMultiplyKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); - - _kernel.use(); - - Window slice = window.first_slice_window_2D(); - Window slice_matrix_b = slice; - - slice_matrix_b.set(Window::DimX, Window::Dimension(0, 1, 1)); - slice_matrix_b.set(Window::DimY, Window::Dimension(0, 1, 1)); - - do - { - Window slice_b = slice; - // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A more than 2 - // This scenario can happen when the the matrix multiplication is used to perform a convolution operation - if(_input1->info()->num_dimensions() < 3) - { - slice_b = slice_matrix_b; - } - - unsigned int idx = 0; - - add_2D_tensor_argument(idx, _input0, 1, slice); - add_2D_tensor_argument(idx, _input1, 2, slice_b); - add_2D_tensor_argument(idx, _output, 3, slice); - _kernel.update_shader_params(); - enqueue(*this, slice); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp deleted file mode 100644 index 1d6ef3d0e8..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/AccessWindowTranspose.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include <cmath> - -using namespace arm_compute; - -void GCGEMMTranspose1xWKernel::configure(const IGCTensor *input, IGCTensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); - - TensorShape output_shape{ input->info()->tensor_shape() }; - const size_t transpose_w = 16 / input->info()->element_size(); - output_shape.set(0, input->info()->dimension(1) * transpose_w); - output_shape.set(1, static_cast<size_t>(std::ceil((input->info()->dimension(0) / static_cast<float>(transpose_w))))); - - // Output tensor auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); - - const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); - const int scale_x = num_elems_processed_per_iteration; - - _input = input; - _output = output; - - std::set<std::string> build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.emplace(("#define " + dt_name)); - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - /* - * Following an example of how the transposition1xW works when the input data type is F32 - * - * |a00 a01 a02 a03| - * |a10 a11 a12 a13| - * |a20 a21 a22 a23| = | a00 a01 a02 a03 || a10 a11 a12 a13 || a20 a21 a22 a23 || a30 a31 a32 a33 | - * |a30 a31 a32 a33| - * - * The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) - */ - // Create kernel - build_opts.emplace("#define GEMM_TRANSPOSE1xW"); - _kernel = GCKernelLibrary::get().create_kernel("gemm_transpose1x4", build_opts); - - // Configure window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - ARM_COMPUTE_ERROR_ON_MSG((win.x().end() / scale_x) == 0, "Transposed shape would be 0 in the second dimension"); - - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowTranspose output_access(output->info(), 0, 0, num_elems_processed_per_iteration, 1, scale_x, 1.f / scale_x); - - update_window_and_padding(win, input_access, output_access); - - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - - IGCKernel::configure(win); -} - -void GCGEMMTranspose1xWKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); - - _kernel.use(); - - // Output is transposed - Window out_window(window); - out_window.set(Window::DimX, window.y()); - out_window.set(Window::DimY, window.x()); - - Window in_slice = window.first_slice_window_2D(); - Window out_slice = out_window.first_slice_window_2D(); - - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, 1, in_slice); - add_2D_tensor_argument(idx, _output, 2, out_slice); - - _kernel.update_shader_params(); - - enqueue(*this, in_slice); - } - while(window.slide_window_slice_2D(in_slice) && out_window.slide_window_slice_2D(out_slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp deleted file mode 100644 index c12dd38cb4..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Size2D.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include <cmath> -#include <tuple> - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - - // Checks performed when output is configured - if(output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - } - - return Status{}; -} -} // namespace - -GCIm2ColKernel::GCIm2ColKernel() - : _input(nullptr), _output(nullptr), _convolved_dims(), _kernel_dims(), _num_elems_processed_per_iteration(1), _run_func(nullptr) -{ -} - -void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); - - _input = input; - _output = output; - - // Create kernel - std::set<std::string> build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - build_opts.insert("#define " + dt_name); - - if(has_bias) - { - build_opts.emplace("#define HAS_BIAS"); - } - - int stride_x = 0; - int stride_y = 0; - - std::tie(stride_x, stride_y) = conv_info.stride(); - _kernel_dims = std::make_pair(kernel_dims.width, kernel_dims.height); - - const bool run_img2col_reduced = (output->info()->dimension(0) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))) && (TensorShape::num_max_dimensions >= 4) - && (std::equal(input->info()->tensor_shape().cbegin() + 3, - input->info()->tensor_shape().cend(), - output->info()->tensor_shape().cbegin() + 1)) - && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding()) - && (dilation == Size2D(1U, 1U)); - - std::string kernel_name = "im2col_generic"; - if(!run_img2col_reduced) - { - if(input->info()->data_type() == DataType::F16 && _kernel_dims == std::pair<unsigned int, unsigned int>(1, 1)) - { - build_opts.emplace("#define KERNEL_1x1"); - } - - build_opts.emplace("#define IM2COL_GENERIC"); - _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), - kernel_dims.width, kernel_dims.height, - conv_info, dilation); - _num_elems_processed_per_iteration = (input->info()->data_type() == DataType::F32) ? 1 : 2; - - build_opts.emplace("#define KERNEL_WIDTH " + support::cpp11::to_string(kernel_dims.width)); - build_opts.emplace("#define KERNEL_HEIGHT " + support::cpp11::to_string(kernel_dims.height)); - build_opts.emplace("#define KERNEL_DEPTH " + support::cpp11::to_string(input->info()->dimension(2))); - build_opts.emplace("#define CONVOLVED_WIDTH " + support::cpp11::to_string(_convolved_dims.first)); - build_opts.emplace("#define CONVOLVED_HEIGHT " + support::cpp11::to_string(_convolved_dims.second)); - build_opts.emplace("#define STRIDE_X " + support::cpp11::to_string(conv_info.stride().first)); - build_opts.emplace("#define STRIDE_Y " + support::cpp11::to_string(conv_info.stride().second)); - build_opts.emplace("#define PAD_LEFT " + support::cpp11::to_string(conv_info.pad_left())); - build_opts.emplace("#define PAD_TOP " + support::cpp11::to_string(conv_info.pad_top())); - build_opts.emplace("#define PAD_RIGHT " + support::cpp11::to_string(conv_info.pad_right())); - build_opts.emplace("#define PAD_BOTTOM " + support::cpp11::to_string(conv_info.pad_bottom())); - build_opts.emplace("#define SRC_WIDTH " + support::cpp11::to_string(input->info()->dimension(0))); - build_opts.emplace("#define SRC_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1))); - build_opts.emplace("#define DILATION_X " + support::cpp11::to_string(dilation.x())); - build_opts.emplace("#define DILATION_Y " + support::cpp11::to_string(dilation.y())); - - _run_func = &GCIm2ColKernel::run_generic; - } - else - { - build_opts.emplace("#define IM2COL_REDUCED"); - kernel_name = "im2col_reduced"; - - if(input->info()->data_type() == DataType::F32) - { - _num_elems_processed_per_iteration = 4 / input->info()->element_size(); - } - else if(input->info()->data_type() == DataType::F16) - { - int input_width = input->info()->dimension(0); - int input_height = input->info()->dimension(1); - - build_opts.emplace("#define IMAGE_SIZE " + support::cpp11::to_string(input_width * input_height)); - if(input_width % 8 == 0) - { - _num_elems_processed_per_iteration = 8; - build_opts.emplace("#define IM2COL_REDUCED_8X"); - } - else if(input_width % 4 == 0) - { - _num_elems_processed_per_iteration = 4; - build_opts.emplace("#define IM2COL_REDUCED_4X"); - } - else if(input_width % 2 == 0) - { - _num_elems_processed_per_iteration = 2; - build_opts.emplace("#define IM2COL_REDUCED_2X"); - } - else - { - _num_elems_processed_per_iteration = 2; - build_opts.emplace("#define IM2COL_REDUCED_GENERIC"); - } - } - - _run_func = &GCIm2ColKernel::run_reduced; - } - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration)); - - if(input->info()->data_type() == DataType::F16) - { - // Calculate input right and bottom border - const int input_width = input->info()->dimension(0); - const int input_height = input->info()->dimension(1); - int input_total_width = input->info()->padding().left + input_width + input->info()->padding().right; - int input_padding_right = ceil_to_multiple(input_total_width, _num_elems_processed_per_iteration) - input_total_width; - input_total_width = input_width + input_padding_right + input->info()->padding().right; - AccessWindowStatic input_access(input->info(), 0, 0, input_total_width, input_height); - - // Calculate output right and bottom border - const int output_width = output->info()->dimension(0); - const int output_height = output->info()->dimension(1); - const int output_padding_right = ceil_to_multiple(output_width, _num_elems_processed_per_iteration) - output_width; - AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height); - - update_window_and_padding(win, input_access, output_access); - } - - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - - if(!run_img2col_reduced) - { - // set the Z dimension's step same size as the whole dimension so that one can't split across the Z dimension - win.set_dimension_step(Window::DimZ, win[Window::DimZ].end() - win[Window::DimZ].start()); - } - - IGCKernel::configure(win); -} - -Status GCIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation) -{ - ARM_COMPUTE_UNUSED(kernel_dims); - ARM_COMPUTE_UNUSED(conv_info); - ARM_COMPUTE_UNUSED(has_bias); - ARM_COMPUTE_UNUSED(dilation); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); - return Status{}; -} - -void GCIm2ColKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON(_run_func == nullptr); - (this->*_run_func)(window); -} - -void GCIm2ColKernel::run_generic(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); - - // Get initial windows - Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ); - - // Change the Z dimension's step back to 1 - window_collapsed.set_dimension_step(Window::DimZ, 1); - - Window slice = window_collapsed.first_slice_window_3D(); - Window slice_in = window_collapsed.first_slice_window_3D(); - Window slice_out = window_collapsed.first_slice_window_3D(); - - // Setup slice - slice.set(Window::DimX, Window::Dimension(0, static_cast<int>(_convolved_dims.first), 1)); - slice.set(Window::DimY, Window::Dimension(0, static_cast<int>(_convolved_dims.second), 1)); - - // Setup output slice - slice_out.set(Window::DimX, Window::Dimension(0, _output->info()->dimension(0), _num_elems_processed_per_iteration)); - slice_out.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 1)); - slice_out.set(Window::DimZ, Window::Dimension(0, 1, 1)); - - // we need top/left pad to be included in valid region - if(_input->info()->data_type() == DataType::F16) - { - (dynamic_cast<TensorInfo *>(_input->info()))->init(_input->info()->tensor_shape(), _input->info()->num_channels(), _input->info()->data_type(), _input->info()->strides_in_bytes(), 0, - _input->info()->total_size()); - } - - _kernel.use(); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, 1, slice_in); - add_2D_tensor_argument(idx, _output, 2, slice_out); - _kernel.set_argument(idx++, static_cast<unsigned int>(_input->info()->strides_in_bytes()[3])); - _kernel.set_argument(idx++, static_cast<unsigned int>(_output->info()->strides_in_bytes()[3])); - _kernel.update_shader_params(); - - enqueue(*this, slice); - } - while(window_collapsed.slide_window_slice_3D(slice) && window_collapsed.slide_window_slice_3D(slice_out) && window_collapsed.slide_window_slice_3D(slice_in)); -} - -void GCIm2ColKernel::run_reduced(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); - - Window out_window; - out_window.use_tensor_dimensions(_output->info()->tensor_shape()); - - Window out_slice = out_window.first_slice_window_1D(); - Window in_slice = window.first_slice_window_3D(); - - _kernel.use(); - - // Run kernel - do - { - // Set arguments - unsigned int idx = 0; - - add_3D_tensor_argument(idx, _input, 1, in_slice); - add_1D_tensor_argument(idx, _output, 2, out_slice); - _kernel.set_argument(idx++, _input->info()->dimension(0)); - _kernel.set_argument(idx++, _input->info()->dimension(1)); - _kernel.update_shader_params(); - - enqueue(*this, in_slice); - } - while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp deleted file mode 100644 index c29d9fc4d5..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h" - -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include <string> - -using namespace arm_compute; - -GCNormalizationLayerKernel::GCNormalizationLayerKernel() - : _input(nullptr), _squared_input(nullptr), _output(nullptr), _border_size(0) -{ -} - -BorderSize GCNormalizationLayerKernel::border_size() const -{ - return _border_size; -} - -void GCNormalizationLayerKernel::configure(const IGCTensor *input, const IGCTensor *squared_input, IGCTensor *output, NormalizationLayerInfo norm_info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd"); - ARM_COMPUTE_ERROR_ON_MSG(norm_info.type() == NormType::IN_MAP_2D, "2D In-Map Normalization not implemented"); - - // Set build options - std::set<std::string> build_opts; - - _input = input; - _squared_input = squared_input; - _output = output; - - const bool is_in_map = norm_info.is_in_map(); - const unsigned int border_width = is_in_map ? std::min(norm_info.norm_size() / 2, 3U) : 0; - _border_size = BorderSize(0, border_width); - - // Set kernel static arguments - std::string func_name = ((norm_info.type() == NormType::IN_MAP_1D) ? "IN_MAP_1D" : "CROSS_MAP"); - build_opts.emplace(("#define " + func_name)); - build_opts.emplace(("#define COEFF " + float_to_string_with_full_precision(norm_info.scale_coeff()))); - build_opts.emplace(("#define BETA " + float_to_string_with_full_precision(norm_info.beta()))); - build_opts.emplace(("#define KAPPA " + float_to_string_with_full_precision(norm_info.kappa()))); - build_opts.emplace(("#define RADIUS " + support::cpp11::to_string(norm_info.norm_size() / 2))); - build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1))); - build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1))); - build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1))); - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("normalization_layer", build_opts)); - - // Configure kernel window - const unsigned int num_elems_processed_per_iteration = 1; - const unsigned int num_elems_read_per_iteration = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2); - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input->info(), -_border_size.left, num_elems_read_per_iteration); - AccessWindowHorizontal squared_input_access(squared_input->info(), -_border_size.left, num_elems_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, squared_input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region()); - - IGCKernel::configure(win); -} - -void GCNormalizationLayerKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - _kernel.use(); - - Window slice = window.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - unsigned int binding = 1; - add_3D_tensor_argument(idx, _input, binding++, slice); - add_3D_tensor_argument(idx, _squared_input, binding++, slice); - add_3D_tensor_argument(idx, _output, binding++, slice); - - _kernel.update_shader_params(); - - enqueue(*this, slice); - } - while(window.slide_window_slice_3D(slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp deleted file mode 100644 index 971b540a83..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.cpp +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h" - -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include "support/StringSupport.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, std); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, std); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(mean->num_dimensions() > 1, "mean and std must be vectors"); - - const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) != mean->dimension(0)); - - // Checks performed when output is configured - if(output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - } - - return Status{}; -} - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, ITensorInfo *mean, ITensorInfo *std) -{ - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output, *input->clone()); - - const unsigned int num_elems_processed_per_iteration = 4; - - // Configure kernel window - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - const int mean_padding = ceil_to_multiple(mean->dimension(0), num_elems_processed_per_iteration) - mean->dimension(0); - const int std_padding = ceil_to_multiple(std->dimension(0), num_elems_processed_per_iteration) - std->dimension(0); - AccessWindowStatic mean_access(mean, 0, 0, mean->dimension(0) + mean_padding, mean->dimension(1)); - AccessWindowStatic std_access(std, 0, 0, std->dimension(0) + std_padding, std->dimension(1)); - - const bool window_changed = update_window_and_padding(win, input_access, output_access, mean_access, std_access); - output_access.set_valid_region(win, input->valid_region()); - - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_pair(err, win); -} -} // namespace - -GCNormalizePlanarYUVLayerKernel::GCNormalizePlanarYUVLayerKernel() - : _input(nullptr), _output(nullptr), _mean(nullptr), _std(nullptr) -{ -} - -void GCNormalizePlanarYUVLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *std) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, std); - - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output->info(), *input->info()->clone()); - - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), mean->info(), std->info())); - - _input = input; - _output = output; - _mean = mean; - _std = std; - - // Set build options - std::set<std::string> build_opts; - build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1))); - build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1))); - build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1))); - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("normalize_planar_yuv_layer", build_opts)); - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info(), mean->info(), std->info()); - ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - - IGCKernel::configure(std::get<1>(win_config)); -} - -Status GCNormalizePlanarYUVLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, mean, std)); - ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get(), mean->clone().get(), std->clone().get()))); - return Status{}; -} - -void GCNormalizePlanarYUVLayerKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - _kernel.use(); - - _output->set_needs_shifting(true); - - Window slice = window.first_slice_window_3D(); - - Window slice_in; - //slice_in.use_tensor_dimensions(_mean->info()->tensor_shape()); - slice_in = window.first_slice_window_1D(); - slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - - unsigned int idx = 2 * num_arguments_per_3D_tensor(); - add_1D_tensor_argument(idx, _mean, 3, slice_in); - add_1D_tensor_argument(idx, _std, 4, slice_in); - - slice_in = window.first_slice_window_3D(); - - slice.shift(Window::DimX, -(_output->info()->padding()).left); - - do - { - idx = 0; - add_3D_tensor_argument(idx, _input, 1, slice_in); - add_3D_tensor_argument(idx, _output, 2, slice); - - _kernel.update_shader_params(); - - enqueue(*this, slice); - } - while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp deleted file mode 100644 index 76559146ae..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include <cmath> -#include <cstdlib> -#include <set> -#include <string> -using namespace arm_compute; - -GCPixelWiseMultiplicationKernel::GCPixelWiseMultiplicationKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr) -{ -} - -void GCPixelWiseMultiplicationKernel::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_ERROR_ON_MSG(scale < 0, "Scale cannot be negative. "); - - // Auto initialize output if not initialized - { - set_shape_if_empty(*output->info(), input1->info()->tensor_shape()); - set_format_if_unknown(*output->info(), Format::F32); - } - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output); - ARM_COMPUTE_ERROR_ON_MSG(scale < 0, "Scale cannot be negative. "); - - _input1 = input1; - _input2 = input2; - _output = output; - - std::string data_type; - std::string compute_type; - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - - build_opts.emplace("#define SCALE " + support::cpp11::to_string(scale)); - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("pixelwise_mul_float", build_opts)); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 1; - - Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal input2_access(input2->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input1_access, input2_access, output_access); - - ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(), - input2->info()->valid_region()); - output_access.set_valid_region(win, valid_region); - - IGCKernel::configure(win); -} - -void GCPixelWiseMultiplicationKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); - - _kernel.use(); - - Window slice = window.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - unsigned int binding = 1; - add_3D_tensor_argument(idx, _input1, binding++, slice); - add_3D_tensor_argument(idx, _input2, binding++, slice); - add_3D_tensor_argument(idx, _output, binding++, slice); - - _kernel.update_shader_params(); - enqueue(*this, slice); - } - while(window.slide_window_slice_3D(slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp deleted file mode 100644 index 13efd10532..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp +++ /dev/null @@ -1,374 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h" - -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include <set> -#include <string> -#include <tuple> - -using namespace arm_compute; - -namespace -{ -// Internal window config info -using GCPoolingConfig = std::pair<unsigned int, BorderSize>; //num_elems_processed_per_iteration, border_size - -void auto_init(const ITensorInfo *input, ITensorInfo *output, unsigned int pooled_w, unsigned int pooled_h) -{ - TensorShape output_shape{ input->tensor_shape() }; - output_shape.set(0, pooled_w); - output_shape.set(1, pooled_h); - - auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape)); -} - -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(indices, "Indices not supported in GLES backend"); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((is_data_type_quantized_asymmetric(input->data_type()) && pool_info.pool_type == PoolingType::L2), - "Unsupported combination of parameters!"); - ARM_COMPUTE_RETURN_ERROR_ON(!pool_info.pad_stride_info.padding_is_symmetric()); - - const bool is_global_pooling = pool_info.is_global_pooling; - const unsigned int pool_size = is_global_pooling ? input->tensor_shape().x() : pool_info.pool_size.width; - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_global_pooling && (input->tensor_shape().x() != input->tensor_shape().y()), - "Global pooling is supported only with rectangular inputs!"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_global_pooling && ((pool_info.pad_stride_info.pad().first >= pool_size) || (pool_info.pad_stride_info.pad().second >= pool_size)), - "Invalid pool size and pool pad combination!"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(pool_info.pool_size.width != pool_info.pool_size.height, "Invalid Pool size, width not equal to height!"); - - // Checks performed when output is configured - if(output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - unsigned int pooled_w = 0; - unsigned int pooled_h = 0; - std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(0), - input->dimension(1), - pool_size, - pool_size, - pool_info.pad_stride_info); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) != pooled_w) || (output->dimension(1) != pooled_h), - "Invalid output pooling dimensions!"); - } - - return Status{}; -} - -std::tuple<Status, Window, GCPoolingConfig> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &pool_info) -{ - int pool_pad_x = 0; - int pool_pad_y = 0; - int pool_stride_x = 0; - int pool_stride_y = 0; - unsigned int pooled_w = 0; - unsigned int pooled_h = 0; - int pool_size = pool_info.pool_size.width; - const PadStrideInfo pad_stride_info = pool_info.pad_stride_info; - std::tie(pool_pad_x, pool_pad_y) = pad_stride_info.pad(); - std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride(); - - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - // Update pool size in case of global pooling - pool_size = pool_info.is_global_pooling ? input->dimension(0) : pool_size; - - // Check output dimensions - std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(0), - input->dimension(1), - pool_size, - pool_size, - pad_stride_info); - - auto_init(input, output, pooled_w, pooled_h); - - BorderSize border_size = BorderSize(pool_pad_y, pool_pad_x); - - const int input_width = input->dimension(0); - const int input_height = input->dimension(1); - - unsigned int num_elems_processed_per_iteration = 1; - - // Create kernel - if(pool_size == 3) - { - // Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenGLES kernel where - // each thread computes 4 output elements - const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3); - - int num_elems_read_per_iteration = pool_size; - - if(input->data_type() == DataType::F32) - { - if(is_pool3x3_stride_le3) - { - // Change the number of elements processed and number of elements read per iteration for pooling 3x3 with stride less equal than 3 - num_elems_processed_per_iteration = 4; - num_elems_read_per_iteration = pool_size * (pool_stride_x + 1); - } - } - else - { - if(is_pool3x3_stride_le3) - { - num_elems_processed_per_iteration = 4; - } - else - { - num_elems_processed_per_iteration = 2; - } - } - - const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width; - const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height; - - border_size.right = std::max(upper_bound_w, pool_pad_x); - border_size.bottom = std::max(upper_bound_h, pool_pad_y); - } - else // Run general case - { - if(input->data_type() == DataType::F32) - { - num_elems_processed_per_iteration = 1; - } - else - { - num_elems_processed_per_iteration = 2; - } - - const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + pool_size) - input_width; - const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height; - - border_size.right = std::max(upper_bound_w, pool_pad_x); - border_size.bottom = std::max(upper_bound_h, pool_pad_y); - } - // Configure kernel window - Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - - if(input->data_type() == DataType::F32) - { - AccessWindowStatic input_access(input, -pool_pad_x, -pool_pad_y, input_width + border_size.right, input_height + border_size.bottom); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - bool window_changed = update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_tuple(err, win, GCPoolingConfig(num_elems_processed_per_iteration, border_size)); - } - else - { - // Calculate output right and bottom border - const int output_width = output->dimension(0); - const int output_height = output->dimension(1); - const int output_padding_right = ceil_to_multiple(output_width, num_elems_processed_per_iteration) - output_width; - const int output_padding_bottom = ceil_to_multiple(output_height, 1) - output_height; - - const int input_total_width = std::max(int(input->padding().left), int(pool_pad_x)) + input_width + std::max(int(input->padding().right), int(pool_pad_x)); - const int input_padding_right = ceil_to_multiple(input_total_width, num_elems_processed_per_iteration) - input_width - pool_pad_x; - const int input_total_height = std::max(int(input->padding().top), int(pool_pad_y)) + input_height + std::max(int(input->padding().bottom), int(pool_pad_y)); - const int input_padding_bottom = input_total_height - input_height - pool_pad_y; - - // Configure kernel window - AccessWindowStatic input_access(input, -pool_pad_x, -pool_pad_y, input_width + input_padding_right, input_height + input_padding_bottom); - AccessWindowStatic output_access(output, 0, 0, output_width + output_padding_right, output_height + output_padding_bottom); - bool window_changed = update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_tuple(err, win, GCPoolingConfig(num_elems_processed_per_iteration, border_size)); - } -} -} // namespace - -GCPoolingLayerKernel::GCPoolingLayerKernel() - : _input(nullptr), _output(nullptr), _indices(nullptr), _pool_info(), _border_size(0), _num_elems_processed_per_iteration(1) -{ -} - -BorderSize GCPoolingLayerKernel::border_size() const -{ - return _border_size; -} - -void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info, IGCTensor *indices) -{ - int pool_pad_x = 0; - int pool_pad_y = 0; - int pool_stride_x = 0; - int pool_stride_y = 0; - unsigned int pooled_w = 0; - unsigned int pooled_h = 0; - const PoolingType pool_type = pool_info.pool_type; - int pool_size = pool_info.pool_size.width; - const PadStrideInfo pad_stride_info = pool_info.pad_stride_info; - const bool exclude_padding = pool_info.exclude_padding; - std::tie(pool_pad_x, pool_pad_y) = pad_stride_info.pad(); - std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride(); - - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - // Update pool size in case of global pooling - pool_size = pool_info.is_global_pooling ? input->info()->dimension(0) : pool_size; - - // Check output dimensions - std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(0), - input->info()->dimension(1), - pool_size, - pool_size, - pad_stride_info); - - auto_init(input->info(), output->info(), pooled_w, pooled_h); - - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, (indices) ? indices->info() : nullptr)); - - // Set instance variables - _input = input; - _output = output; - _pool_info = pool_info; - _indices = indices; - // Set build options - std::set<std::string> build_opts; - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - if(input->info()->data_type() == DataType::F32) - { - build_opts.insert("#define DATA_TYPE_FP32"); - } - else - { - build_opts.insert("#define DATA_TYPE_FP16"); - } - if(exclude_padding) - { - build_opts.emplace("#define EXCLUDE_PADDING"); - } - build_opts.emplace(("#define POOL_" + string_from_pooling_type(pool_type))); - build_opts.emplace(("#define STRIDE_X " + support::cpp11::to_string(pool_stride_x))); - build_opts.emplace(("#define MAX_WIDTH " + support::cpp11::to_string(input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x)))); - build_opts.emplace(("#define MAX_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y)))); - build_opts.emplace(("#define STRIDE_Y " + support::cpp11::to_string(pool_stride_y))); - build_opts.emplace(("#define PAD_X " + support::cpp11::to_string(pool_pad_x))); - build_opts.emplace(("#define PAD_Y " + support::cpp11::to_string(pool_pad_y))); - - // Create kernel - if((pool_size == 2) || (pool_size == 3) || (pool_size == 7)) - { - // Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenGLES kernel where - // each thread computes 4 output elements - const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3); - - std::string kernel_name = "pooling_layer_" + support::cpp11::to_string(pool_size); - if(is_pool3x3_stride_le3) - { - build_opts.insert("#define POOLING_LAYER_3_OPTIMIZED"); - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name + "_optimized", build_opts)); - } - else - { - build_opts.insert("#define POOLING_LAYER_" + support::cpp11::to_string(pool_size)); - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, build_opts)); - } - } - else // Run general case - { - build_opts.emplace(("#define POOL_SIZE " + support::cpp11::to_string(pool_size))); - - build_opts.insert("#define POOLING_LAYER_N"); - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("pooling_layer_n", build_opts)); - } - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info(), pool_info); - ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - - IGCKernel::configure(std::get<1>(win_config)); - GCPoolingConfig pooling_config = std::get<2>(win_config); - _num_elems_processed_per_iteration = pooling_config.first; - _border_size = pooling_config.second; -} - -Status GCPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, indices)); - ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get(), pool_info))); - - return Status{}; -} - -void GCPoolingLayerKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - unsigned int pool_pad_x; - unsigned int pool_pad_y; - unsigned int pool_stride_x; - unsigned int pool_stride_y; - std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info.pad(); - std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info.stride(); - - _kernel.use(); - - _output->set_needs_shifting(true); - - Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ); - - Window slice = window_collapsed.first_slice_window_3D(); - Window slice_in_orig = window_collapsed.first_slice_window_3D(); - - slice.shift(Window::DimX, -(_output->info()->padding()).left); - - do - { - // Upsample input by pool size - Window in_slice(slice_in_orig); // NOLINT - in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start() - pool_pad_x, in_slice.x().end() * pool_stride_x, pool_stride_x * _num_elems_processed_per_iteration)); - in_slice.set(Window::DimY, Window::Dimension(in_slice.y().start() - pool_pad_y, in_slice.y().end() * pool_stride_y, pool_stride_y)); - - // Set inputs - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, 1, in_slice); - add_3D_tensor_argument(idx, _output, 2, slice); - - _kernel.update_shader_params(); - enqueue(*this, slice); - } - while(window_collapsed.slide_window_slice_3D(slice) && window_collapsed.slide_window_slice_3D(slice_in_orig)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp deleted file mode 100644 index a0795c668f..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include <set> -#include <string> - -using namespace arm_compute; - -BorderSize GCScaleKernel::border_size() const -{ - return BorderSize(1); -} - -void GCScaleKernel::configure(const IGCTensor *input, IGCTensor *output, const ScaleKernelInfo &info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON(output == input); - ARM_COMPUTE_ERROR_ON(info.interpolation_policy != InterpolationPolicy::NEAREST_NEIGHBOR); - - _input = input; - _output = output; - - // Compute the ratio between source width/height and destination width/height - const auto wr = static_cast<float>(input->info()->dimension(0)) / static_cast<float>(output->info()->dimension(0)); - const auto hr = static_cast<float>(input->info()->dimension(1)) / static_cast<float>(output->info()->dimension(1)); - - // Compute actual border size - const bool border_undefined = info.border_mode == BorderMode::UNDEFINED; - BorderSize border = border_undefined ? BorderSize(0) : border_size(); - - // Area interpolation behaves as Nearest Neighbour in case of up-sampling - auto interpolation_policy_to_use = info.interpolation_policy; - if(interpolation_policy_to_use == InterpolationPolicy::AREA && wr <= 1.f && hr <= 1.f) - { - interpolation_policy_to_use = InterpolationPolicy::NEAREST_NEIGHBOR; - } - else - { - ARM_COMPUTE_ERROR_ON(interpolation_policy_to_use == InterpolationPolicy::AREA); - } - - // Create kernel - std::set<std::string> build_opts; - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - - build_opts.emplace("#define DATA_TYPE_FP16"); - build_opts.emplace("#define BORDER_SIZE " + support::cpp11::to_string(border.right)); - if(info.sampling_policy == SamplingPolicy::TOP_LEFT) - { - build_opts.emplace("#define SAMPLING_POLICY_TOP_LEFT"); - } - else - { - build_opts.emplace("#define SAMPLING_POLICY_CENTER"); - } - - // Configure kernel window - unsigned int num_elems_processed_per_iteration = 4; - unsigned int input_width_alignment = 2; - - // performance optimization for 2x upscaling with no border - if((fabs(wr - 0.5) < 1e-6) && (fabs(hr - 0.5) < 1e-6) && border_undefined) - { - num_elems_processed_per_iteration = 8; - input_width_alignment = 4; - build_opts.emplace("#define SCALE_NEAREST_8X"); - } - else - { - build_opts.emplace("#define SCALE_NEAREST_GENERIC"); - } - - std::string interpolation_name = string_from_interpolation_policy(interpolation_policy_to_use); // NOLINT - std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower); - std::string kernel_name = "scale_" + interpolation_name; - _kernel = GCKernelLibrary::get().create_kernel(kernel_name, build_opts); - - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - - const ValidRegion &input_valid_region = input->info()->valid_region(); - - const int total_width = border.left + input_valid_region.anchor[0] + input_valid_region.shape[0] + border.right; - const int padding_right = ceil_to_multiple(total_width, input_width_alignment) - border.left - input_valid_region.anchor[0] - input_valid_region.shape[0]; - - // Reads can occur within the valid region of the input - AccessWindowStatic input_access(input->info(), - input_valid_region.anchor[0] - border.left, input_valid_region.anchor[1] - border.top, - input_valid_region.anchor[0] + input_valid_region.shape[0] + padding_right, - input_valid_region.anchor[1] + input_valid_region.shape[1] + border.bottom); - - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, calculate_valid_region_scale(*(input->info()), - output->info()->tensor_shape(), - info.interpolation_policy, - info.sampling_policy, - border_undefined)); - - IGCKernel::configure(win); - - unsigned int idx = 2 * num_arguments_per_3D_tensor(); //Skip the tensor parameters - _kernel.set_argument<float>(idx++, static_cast<float>(input->info()->dimension(0))); - _kernel.set_argument<float>(idx++, static_cast<float>(input->info()->dimension(1))); - _kernel.set_argument<float>(idx++, wr); - _kernel.set_argument<float>(idx++, hr); -} - -void GCScaleKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - _kernel.use(); - - _output->set_needs_shifting(true); - - Window slice = window.first_slice_window_3D(); - Window slice_in = window.first_slice_window_3D(); - - slice.shift(Window::DimX, -(_output->info()->padding()).left); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, 1, slice_in); - add_3D_tensor_argument(idx, _output, 2, slice); - _kernel.update_shader_params(); - enqueue(*this, slice); - } - while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp deleted file mode 100644 index 39d586da72..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h" - -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include <set> -#include <string> - -using namespace arm_compute; - -void GCLogits1DMaxKernel::configure(const IGCTensor *input, IGCTensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); - - // Softmax across the x dimension - TensorShape output_shape{ input->info()->tensor_shape() }; - output_shape.set(0, 1); - - // Output auto initialization if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); - - _input = input; - _output = output; - - // Set build options - std::set<std::string> build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.insert("#define " + dt_name); - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - build_opts.insert("#define SOFTMAX_LAYER_MAX"); - - // Tell the kernel that the width is not a multiple of 8 - if((input->info()->dimension(0) % 8) != 0) - { - build_opts.insert("#define NON_MULTIPLE_OF_8"); - } - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("softmax_layer_max", build_opts)); - - // Set fixed arguments - unsigned int idx = 2 * num_arguments_per_3D_tensor(); //Skip the input and output parameters - _kernel.set_argument(idx++, input->info()->dimension(0)); - - // Configure kernel window - // The kernel loops over all elements in steps of 8 - const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 8); - unsigned int num_elems_written_per_iteration = 1; - if(input->info()->data_type() == DataType::F16) - { - num_elems_written_per_iteration = 2; - } - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - - IGCKernel::configure(win); -} - -GCLogits1DShiftExpSumKernel::GCLogits1DShiftExpSumKernel() - : _input(nullptr), _max(nullptr), _output(nullptr), _sum(nullptr) -{ -} - -void GCLogits1DShiftExpSumKernel::configure(const IGCTensor *input, const IGCTensor *max, IGCTensor *output, IGCTensor *sum) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output); - - // Output auto initialization if not yet initialized - auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type()); - auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, max, sum); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(max, sum); - - _input = input; - _max = max; - _output = output; - _sum = sum; - - // Set build options - std::set<std::string> build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.insert("#define " + dt_name); - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - build_opts.insert("#define SOFTMAX_LAYER_SHIFT_EXP_SUM"); - - // Tell the kernel that the width is not a multiple of 8 - if((input->info()->dimension(0) % 8) != 0) - { - build_opts.insert("#define NON_MULTIPLE_OF_8"); - } - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("softmax_layer_shift_exp_sum", build_opts)); - - // Set fixed arguments - unsigned int idx = 4 * num_arguments_per_3D_tensor(); //Skip the input and output parameters - _kernel.set_argument(idx++, input->info()->dimension(0)); - - // Configure window - // The kernel loops over all elements in steps of 8 - const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 8); - unsigned int num_elems_written_per_iteration = 1; - if(input->info()->data_type() == DataType::F16) - { - num_elems_written_per_iteration = 2; - } - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal max_access(max->info(), 0, num_elems_written_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal sum_access(sum->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, max_access, output_access, sum_access); - - output_access.set_valid_region(win, input->info()->valid_region()); - sum_access.set_valid_region(win, ValidRegion(Coordinates(), sum->info()->tensor_shape())); - - IGCKernel::configure(win); -} - -void GCLogits1DShiftExpSumKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ); - Window slice = window_collapsed.first_slice_window_3D(); - - _kernel.use(); - - do - { - unsigned int idx = 0; - unsigned int binding = 1; // SSBO binding starts from 1. - // Set inputs - add_3D_tensor_argument(idx, _input, binding++, slice); - add_3D_tensor_argument(idx, _max, binding++, slice); - add_3D_tensor_argument(idx, _output, binding++, slice); - add_3D_tensor_argument(idx, _sum, binding++, slice); - _kernel.update_shader_params(); - enqueue(*this, slice); - } - while(window_collapsed.slide_window_slice_3D(slice)); -} - -GCLogits1DNormKernel::GCLogits1DNormKernel() - : _input(nullptr), _sum(nullptr), _output(nullptr) -{ -} - -void GCLogits1DNormKernel::configure(const IGCTensor *input, const IGCTensor *sum, IGCTensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(sum, output); - - // Output auto initialization if not yet initialized - auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - - _input = input; - _sum = sum; - _output = output; - - // Set build options - std::set<std::string> build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.insert("#define " + dt_name); - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - build_opts.insert("#define SOFTMAX_LAYER_NORM"); - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("softmax_layer_norm", build_opts)); - - // Configure window - constexpr unsigned int num_elems_processed_per_iteration = 8; - unsigned int num_elems_written_per_iteration = 1; - if(input->info()->data_type() == DataType::F16) - { - num_elems_written_per_iteration = 2; - } - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowStatic sum_access(sum->info(), 0, 0, num_elems_written_per_iteration, sum->info()->dimension(1)); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, sum_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region()); - - IGCKernel::configure(win); -} - -void GCLogits1DNormKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ); - Window slice = window_collapsed.first_slice_window_3D(); - - _kernel.use(); - - do - { - Window sum_slice = slice; - sum_slice.set(Window::DimX, Window::Dimension(0, 1, 1)); - - unsigned int idx = 0; - unsigned int binding = 1; // SSBO binding starts from 1. - // Set inputs - add_3D_tensor_argument(idx, _input, binding++, slice); - add_3D_tensor_argument(idx, _sum, binding++, slice); - add_3D_tensor_argument(idx, _output, binding++, slice); - - _kernel.update_shader_params(); - enqueue(*this, slice); - } - while(window_collapsed.slide_window_slice_3D(slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp deleted file mode 100644 index 78b008484e..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -GCTensorShiftKernel::GCTensorShiftKernel() - : _input(nullptr), _lws(gles::NDRange(1U, 1U, 1U)), _left_padding(0) -{ -} - -void GCTensorShiftKernel::configure(IGCTensor *input) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - - _input = input; - - std::set<std::string> options; - options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0])); - options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1])); - options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2])); - options.emplace("#define WIDTH " + support::cpp11::to_string(input->info()->dimension(0))); - - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - options.emplace(("#define " + dt_name)); - - unsigned int num_elems_written_per_iteration_x = input->info()->dimension(0) + input->info()->padding().left + input->info()->padding().right; - - std::stringstream kernel_name; - kernel_name << "tensorshift"; - - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name.str(), options)); - - Window win; - win.set(Window::DimX, Window::Dimension(0, num_elems_written_per_iteration_x, num_elems_written_per_iteration_x)); - win.use_tensor_dimensions(input->info()->tensor_shape(), Window::DimY); - win.use_tensor_dimensions(input->info()->tensor_shape(), Window::DimZ); - - _left_padding = _input->info()->padding().left; - - IGCKernel::configure(win); -} - -void GCTensorShiftKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - if(int(_left_padding) == 0 || !_input->needs_shifting()) - { - return; - } - - _kernel.use(); - - // Get initial windows - Window slice = window.first_slice_window_3D(); - slice.shift(Window::DimX, -(_input->info()->padding()).left); - - do - { - unsigned int idx = 0; - - add_3D_tensor_argument(idx, _input, 1, slice); - - _kernel.set_argument(idx++, static_cast<unsigned int>(_left_padding)); - - _kernel.update_shader_params(); - enqueue(*this, slice, _lws); - } - while(window.slide_window_slice_3D(slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp deleted file mode 100644 index 3bec05b5f1..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include <set> -#include <string> - -using namespace arm_compute; - -void GCTransposeKernel::configure(const IGCTensor *input, IGCTensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); - - TensorShape output_shape{ input->info()->tensor_shape() }; - const size_t w_out = input->info()->dimension(1); - const size_t h_out = input->info()->dimension(0); - output_shape.set(0, w_out); - output_shape.set(1, h_out); - - // Output tensor auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - _input = input; - _output = output; - - // for better performance - if(w_out < 512 && h_out < 512) - { - _lws_hint = gles::NDRange(8U, 1U, 1U); - } - else - { - _lws_hint = gles::NDRange(1U, 8U, 1U); - } - - std::set<std::string> build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.emplace(("#define " + dt_name)); - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws_hint[0])); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws_hint[1])); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws_hint[2])); - - // Configure kernel window - unsigned int num_elems_processed_per_iteration = 4; - - if(input->info()->data_type() == DataType::F16) - { -#define TRANSPOSE_8X8 - -#if defined(TRANSPOSE_4X4) - build_opts.emplace(("#define TRANSPOSE_4X4")); - num_elems_processed_per_iteration = 4; -#elif defined(TRANSPOSE_8X8) /* TRANSPOSE_4X4 */ - if(w_out != h_out) - { - build_opts.emplace("#define TRANSPOSE_8X8"); - num_elems_processed_per_iteration = 8; - } - else - { - build_opts.emplace("#define TRANSPOSE_8X8_SQUARE"); - num_elems_processed_per_iteration = 8; - } -#endif /* TRANSPOSE_4X4 */ - } - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("transpose", build_opts)); - - const unsigned int width_aligned = num_elems_processed_per_iteration * static_cast<unsigned int>(_lws_hint[0]); - const unsigned int height_aligned = num_elems_processed_per_iteration * static_cast<unsigned int>(_lws_hint[1]); - - AccessWindowStatic input_access(input->info(), 0, 0, - ceil_to_multiple(input->info()->dimension(0), width_aligned), - ceil_to_multiple(input->info()->dimension(1), height_aligned)); - AccessWindowStatic output_access(output->info(), 0, 0, - ceil_to_multiple(output->info()->dimension(0), height_aligned), - ceil_to_multiple(output->info()->dimension(1), width_aligned)); - - Window win = calculate_max_window(*input->info(), Steps(width_aligned, height_aligned)); - win.set_dimension_step(Window::DimX, num_elems_processed_per_iteration); - win.set_dimension_step(Window::DimY, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, output->info()->valid_region()); - - IGCKernel::configure(win); -} - -void GCTransposeKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); - - _kernel.use(); - - Window slice = window.first_slice_window_2D(); - - do - { - unsigned int idx = 0; - - add_2D_tensor_argument(idx, _input, 1, slice); - add_2D_tensor_argument(idx, _output, 2, slice); - _kernel.update_shader_params(); - enqueue(*this, slice, _lws_hint); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp deleted file mode 100644 index bcdbfb60dc..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" - -using namespace arm_compute; -using namespace arm_compute::misc::shape_calculator; - -GCWeightsReshapeKernel::GCWeightsReshapeKernel() - : _input(nullptr), _biases(nullptr), _output(nullptr) -{ -} - -void GCWeightsReshapeKernel::configure(const IGCTensor *input, const IGCTensor *biases, IGCTensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); - - // Output tensor auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_weights_reshaped_shape(*input->info(), (biases != nullptr)))); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - if(biases != nullptr) - { - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); - ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->num_dimensions() != 1)); - ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->num_dimensions() != 2)); - ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3])); - ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3] || biases->info()->dimension(1) != input->info()->tensor_shape()[4])); - } - - _biases = biases; - _output = output; - _input = input; - - // Create build options - std::set<std::string> build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.emplace("#define " + dt_name); - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - build_opts.emplace("#define RESHAPE_TO_COLUMNS"); - if(biases != nullptr) - { - build_opts.emplace("#define HAS_BIAS"); - } - - // Create kernel - _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("reshape_to_columns", build_opts)); - - // Set static arguments - unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor(); - idx += (biases != nullptr) ? num_arguments_per_1D_tensor() : 0; - _kernel.set_argument(idx++, _input->info()->dimension(0)); - _kernel.set_argument(idx++, _input->info()->dimension(1)); - _kernel.set_argument(idx++, _input->info()->dimension(2)); - _kernel.set_argument(idx++, _input->info()->dimension(3)); - - // Configure window - Window win = calculate_max_window(*input->info(), Steps()); - - // The GCWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - IGCKernel::configure(win); -} - -void GCWeightsReshapeKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); - - Window out_window; - out_window.use_tensor_dimensions(_output->info()->tensor_shape()); - - Window in_slice = window.first_slice_window_3D(); - Window out_slice = out_window.first_slice_window_2D(); - - Window biases_window; - Window biases_slice; - - if(_biases != nullptr) - { - biases_window.use_tensor_dimensions(_biases->info()->tensor_shape()); - biases_slice = biases_window.first_slice_window_1D(); - } - - _kernel.use(); - - do - { - // Set arguments - unsigned idx = 0; - add_3D_tensor_argument(idx, _input, 1, in_slice); - add_2D_tensor_argument(idx, _output, 2, out_slice); - if(_biases != nullptr) - { - add_1D_tensor_argument(idx, _biases, 3, biases_slice); - biases_window.slide_window_slice_1D(biases_slice); - } - - _kernel.update_shader_params(); - // Run kernel - enqueue(*this, in_slice); - } - while(window.slide_window_slice_4D(in_slice) && out_window.slide_window_slice_2D(out_slice)); -} |