From e855c237a5b61c4ed5a5ab79dd4af27385cf72f5 Mon Sep 17 00:00:00 2001 From: Stephen Li Date: Thu, 4 Jan 2018 14:13:22 +0800 Subject: APPBROWSER-377: GCConvoutionLayer support for FP16 Change-Id: I801b5e393a16a9f92c062826e6fcfd5982ca7bb3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/116584 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp | 118 +++++++++++++++-------- 1 file changed, 79 insertions(+), 39 deletions(-) (limited to 'src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp') diff --git a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp index e849891c7c..4ab6f3e89d 100644 --- a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h" #include "arm_compute/core/AccessWindowStatic.h" @@ -30,6 +31,7 @@ #include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" #include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" #include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Size2D.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "support/ToolchainSupport.h" @@ -39,20 +41,40 @@ using namespace arm_compute; +namespace +{ +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); + + // Checks performed when output is configured + if(output->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); + } + + return Status{}; +} +} // namespace + GCIm2ColKernel::GCIm2ColKernel() - : _input(nullptr), _output(nullptr), _convolved_dims(), _num_elems_processed_per_iteration(1), _run_func(nullptr) + : _input(nullptr), _output(nullptr), _convolved_dims(), _kernel_dims(), _num_elems_processed_per_iteration(1), _run_func(nullptr) { } -void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, std::pair kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_UNUSED(kernel_dims); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + + // Perform validation step + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); _input = input; _output = output; + // Create kernel std::set build_opts; std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); @@ -65,48 +87,52 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, std::p build_opts.emplace("#define HAS_BIAS"); } - int pad_x = 0; - int pad_y = 0; int stride_x = 0; int stride_y = 0; - std::tie(pad_x, pad_y) = conv_info.pad(); + std::tie(stride_x, stride_y) = conv_info.stride(); + _kernel_dims = std::make_pair(kernel_dims.width, kernel_dims.height); const bool run_img2col_reduced = (output->info()->dimension(0) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))) && (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(), output->info()->tensor_shape().cbegin() + 1)) - && ((stride_x == 1) && (stride_y == 1) && (pad_x == 0) && (pad_y == 0)); + && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding()); + std::string kernel_name = "im2col_generic"; if(!run_img2col_reduced) { - // this path is currently not used and not validated - build_opts.insert("#define IM2COL_GENERIC"); + if(input->info()->data_type() == DataType::F16 && _kernel_dims == std::pair(1, 1)) + { + build_opts.emplace("#define KERNEL_1x1"); + } + + build_opts.emplace("#define IM2COL_GENERIC"); _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), - kernel_dims.first, kernel_dims.second, + kernel_dims.width, kernel_dims.height, conv_info); - _num_elems_processed_per_iteration = output->info()->dimension(0); + _num_elems_processed_per_iteration = 2; - build_opts.emplace("#define KERNEL_WIDTH " + support::cpp11::to_string(kernel_dims.first)); - build_opts.emplace("#define KERNEL_HEIGHT " + support::cpp11::to_string(kernel_dims.second)); + build_opts.emplace("#define KERNEL_WIDTH " + support::cpp11::to_string(kernel_dims.width)); + build_opts.emplace("#define KERNEL_HEIGHT " + support::cpp11::to_string(kernel_dims.height)); build_opts.emplace("#define KERNEL_DEPTH " + support::cpp11::to_string(input->info()->dimension(2))); build_opts.emplace("#define CONVOLVED_WIDTH " + support::cpp11::to_string(_convolved_dims.first)); build_opts.emplace("#define CONVOLVED_HEIGHT " + support::cpp11::to_string(_convolved_dims.second)); build_opts.emplace("#define STRIDE_X " + support::cpp11::to_string(conv_info.stride().first)); build_opts.emplace("#define STRIDE_Y " + support::cpp11::to_string(conv_info.stride().second)); - build_opts.emplace("#define PAD_X " + support::cpp11::to_string(conv_info.pad().first)); - build_opts.emplace("#define PAD_Y " + support::cpp11::to_string(conv_info.pad().second)); + build_opts.emplace("#define PAD_LEFT " + support::cpp11::to_string(conv_info.pad_left())); + build_opts.emplace("#define PAD_TOP " + support::cpp11::to_string(conv_info.pad_top())); + build_opts.emplace("#define PAD_RIGHT " + support::cpp11::to_string(conv_info.pad_right())); + build_opts.emplace("#define PAD_BOTTOM " + support::cpp11::to_string(conv_info.pad_bottom())); build_opts.emplace("#define SRC_WIDTH " + support::cpp11::to_string(input->info()->dimension(0))); build_opts.emplace("#define SRC_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1))); - // Create kernel - _kernel = static_cast(GCKernelLibrary::get().create_kernel("im2col_generic", build_opts)); - _run_func = &GCIm2ColKernel::run_generic; } else { - build_opts.insert("#define IM2COL_REDUCED"); + build_opts.emplace("#define IM2COL_REDUCED"); + kernel_name = "im2col_reduced"; if(input->info()->data_type() == DataType::F32) { @@ -117,42 +143,47 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, std::p int input_width = input->info()->dimension(0); int input_height = input->info()->dimension(1); - build_opts.insert("#define IMAGE_SIZE " + support::cpp11::to_string(input_width * input_height)); + build_opts.emplace("#define IMAGE_SIZE " + support::cpp11::to_string(input_width * input_height)); if(input_width % 8 == 0) { _num_elems_processed_per_iteration = 8; - build_opts.insert("#define IM2COL_REDUCED_8X"); + build_opts.emplace("#define IM2COL_REDUCED_8X"); } else if(input_width % 4 == 0) { _num_elems_processed_per_iteration = 4; - build_opts.insert("#define IM2COL_REDUCED_4X"); + build_opts.emplace("#define IM2COL_REDUCED_4X"); } else if(input_width % 2 == 0) { _num_elems_processed_per_iteration = 2; - build_opts.insert("#define IM2COL_REDUCED_2X"); + build_opts.emplace("#define IM2COL_REDUCED_2X"); } else { _num_elems_processed_per_iteration = 2; - build_opts.insert("#define IM2COL_REDUCED_GENERIC"); + build_opts.emplace("#define IM2COL_REDUCED_GENERIC"); } } - // Create kernel - _kernel = static_cast(GCKernelLibrary::get().create_kernel("im2col_reduced", build_opts)); - _run_func = &GCIm2ColKernel::run_reduced; } + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel(kernel_name, build_opts)); + // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration)); if(input->info()->data_type() == DataType::F16) { // Calculate input right and bottom border - AccessWindowHorizontal input_access(input->info(), 0, _num_elems_processed_per_iteration); + const int input_width = input->info()->dimension(0); + const int input_height = input->info()->dimension(1); + int input_total_width = input->info()->padding().left + input_width + input->info()->padding().right; + int input_padding_right = ceil_to_multiple(input_total_width, _num_elems_processed_per_iteration) - input_total_width; + input_total_width = input_width + input_padding_right + input->info()->padding().right; + AccessWindowStatic input_access(input->info(), 0, 0, input_total_width, input_height); // Calculate output right and bottom border const int output_width = output->info()->dimension(0); @@ -174,6 +205,15 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, std::p IGCKernel::configure(win); } +Status GCIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +{ + ARM_COMPUTE_UNUSED(kernel_dims); + ARM_COMPUTE_UNUSED(conv_info); + ARM_COMPUTE_UNUSED(has_bias); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); + return Status{}; +} + void GCIm2ColKernel::run(const Window &window) { ARM_COMPUTE_ERROR_ON(_run_func == nullptr); @@ -187,6 +227,7 @@ void GCIm2ColKernel::run_generic(const Window &window) // Get initial windows Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ); + // Change the Z dimension's step back to 1 window_collapsed.set_dimension_step(Window::DimZ, 1); @@ -198,17 +239,18 @@ void GCIm2ColKernel::run_generic(const Window &window) slice.set(Window::DimX, Window::Dimension(0, static_cast(_convolved_dims.first), 1)); slice.set(Window::DimY, Window::Dimension(0, static_cast(_convolved_dims.second), 1)); - // Setup input slice - // The first three dimensions of the input are increased by the inner loops - slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); - // Setup output slice slice_out.set(Window::DimX, Window::Dimension(0, _output->info()->dimension(0), _num_elems_processed_per_iteration)); slice_out.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 1)); slice_out.set(Window::DimZ, Window::Dimension(0, 1, 1)); + // we need top/left pad to be included in valid region + if(_input->info()->data_type() == DataType::F16) + { + (dynamic_cast(_input->info()))->init(_input->info()->tensor_shape(), _input->info()->num_channels(), _input->info()->data_type(), _input->info()->strides_in_bytes(), 0, + _input->info()->total_size(), _input->info()->fixed_point_position()); + } + _kernel.use(); do @@ -216,8 +258,6 @@ void GCIm2ColKernel::run_generic(const Window &window) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, 1, slice_in); add_2D_tensor_argument(idx, _output, 2, slice_out); - - _kernel.set_argument(idx++, static_cast(_input->info()->dimension(2))); _kernel.set_argument(idx++, static_cast(_input->info()->strides_in_bytes()[3])); _kernel.set_argument(idx++, static_cast(_output->info()->strides_in_bytes()[3])); _kernel.update_shader_params(); -- cgit v1.2.1