From d05dce46a14a7b67f322328ecd95bf96bdd30bae Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 22 Jan 2018 16:29:17 +0000 Subject: COMPMID-791: Generic Depthwise Convolution Layer NEON QASYMM8 Change-Id: I33cf54e68f6c097ac58b6f16c3f9a720978f09cd Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/117289 Tested-by: Jenkins Reviewed-by: Michalis Spyrou Reviewed-by: Anthony Barbier --- .../kernels/NEDepthwiseVectorToTensorKernel.cpp | 89 ++++++++++++++-------- 1 file changed, 59 insertions(+), 30 deletions(-) (limited to 'src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp') diff --git a/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp b/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp index 9b36df3c39..8960d8a8af 100644 --- a/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,14 +37,46 @@ using namespace arm_compute; +template +void NEDepthwiseVectorToTensorKernel::vector_to_tensor(const Window &window) +{ + // const int input_w = _input->info()->dimension(0); + const int output_stride_x = _output->info()->strides_in_bytes().x(); + const int output_stride_y = _output->info()->strides_in_bytes().y(); + const int output_stride_z = _output->info()->strides_in_bytes().z(); + + // Setup output window + Window window_out(window); + window_out.set(Window::DimX, Window::Dimension(0, 0, 0)); + window_out.set(Window::DimY, Window::Dimension(0, 0, 0)); + window_out.set(Window::DimZ, Window::Dimension(0, 0, 0)); + + Iterator in(_input, window); + Iterator out(_output, window_out); + + const int patch_size = _conv_dims.first * _conv_dims.second; + + execute_window_loop(window, [&](const Coordinates & id) + { + const int z = id.x() / patch_size; + const int index2D = id.x() - z * patch_size; + + auto input_ptr = reinterpret_cast(in.ptr()); + auto output_ptr = reinterpret_cast(out.ptr() + index2D % _conv_dims.first * output_stride_x + index2D / _conv_dims.first * output_stride_y + z * output_stride_z); + + *output_ptr = *input_ptr; + }, + in, out); +} + NEDepthwiseVectorToTensorKernel::NEDepthwiseVectorToTensorKernel() - : _input(nullptr), _output(nullptr), _conv_dims() + : _func(nullptr), _input(nullptr), _output(nullptr), _conv_dims() { } void NEDepthwiseVectorToTensorKernel::configure(const ITensor *input, ITensor *output, size_t conv_w, size_t conv_h) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(output); TensorShape output_shape = input->info()->tensor_shape(); @@ -53,7 +85,7 @@ void NEDepthwiseVectorToTensorKernel::configure(const ITensor *input, ITensor *o output_shape.set(2, input->info()->tensor_shape()[0] / (conv_w * conv_h)); // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape)); ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -63,6 +95,25 @@ void NEDepthwiseVectorToTensorKernel::configure(const ITensor *input, ITensor *o _output = output; _conv_dims = std::pair(conv_w, conv_h); + // Set appropriate function to run + switch(input->info()->data_type()) + { + case DataType::QASYMM8: + _func = &NEDepthwiseVectorToTensorKernel::vector_to_tensor; + break; + case DataType::S32: + _func = &NEDepthwiseVectorToTensorKernel::vector_to_tensor; + break; + case DataType::F16: + _func = &NEDepthwiseVectorToTensorKernel::vector_to_tensor; + break; + case DataType::F32: + _func = &NEDepthwiseVectorToTensorKernel::vector_to_tensor; + break; + default: + ARM_COMPUTE_ERROR("Unsupported data type"); + } + // Configure kernel window Window win = calculate_max_window(*input->info(), Steps()); // The NEDepthwisevectorToTensorKernel doesn't need padding so update_window_and_padding() can be skipped @@ -75,32 +126,10 @@ void NEDepthwiseVectorToTensorKernel::run(const Window &window, const ThreadInfo { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - // const int input_w = _input->info()->dimension(0); - const int output_stride_x = _output->info()->strides_in_bytes().x(); - const int output_stride_y = _output->info()->strides_in_bytes().y(); - const int output_stride_z = _output->info()->strides_in_bytes().z(); - - // Setup output window - Window window_out(window); - window_out.set(Window::DimX, Window::Dimension(0, 0, 0)); - window_out.set(Window::DimY, Window::Dimension(0, 0, 0)); - window_out.set(Window::DimZ, Window::Dimension(0, 0, 0)); - - Iterator in(_input, window); - Iterator out(_output, window_out); - - const int patch_size = _conv_dims.first * _conv_dims.second; - - execute_window_loop(window, [&](const Coordinates & id) + if(_func != nullptr) { - const int z = id.x() / patch_size; - const int index2D = id.x() - z * patch_size; - - auto input_ptr = reinterpret_cast(in.ptr()); - auto output_ptr = reinterpret_cast(out.ptr() + index2D % _conv_dims.first * output_stride_x + index2D / _conv_dims.first * output_stride_y + z * output_stride_z); - - *output_ptr = *input_ptr; - }, - in, out); + (this->*_func)(window); + } } -- cgit v1.2.1