From 05fb448bf48e31d723dfd9f4bbf3899ff65f0fba Mon Sep 17 00:00:00 2001 From: giuros01 Date: Tue, 26 Mar 2019 17:44:40 +0000 Subject: COMPMID-1963: Implement FFT (2D) on NEON Change-Id: I3b564be8d7949e00c6544071ef62dd51de838c96 Signed-off-by: giuros01 Reviewed-on: https://review.mlplatform.org/c/1048 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- src/runtime/NEON/functions/NEFFT1D.cpp | 41 +++++++++------ src/runtime/NEON/functions/NEFFT2D.cpp | 95 ++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 15 deletions(-) create mode 100644 src/runtime/NEON/functions/NEFFT2D.cpp (limited to 'src/runtime/NEON') diff --git a/src/runtime/NEON/functions/NEFFT1D.cpp b/src/runtime/NEON/functions/NEFFT1D.cpp index d3ff674a2a..665efeb440 100644 --- a/src/runtime/NEON/functions/NEFFT1D.cpp +++ b/src/runtime/NEON/functions/NEFFT1D.cpp @@ -31,7 +31,7 @@ namespace arm_compute { NEFFT1D::NEFFT1D(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _digit_reversed_input(), _digit_reverse_indices(), _digit_reverse_kernel(), _fft_kernels(), _n_ffts(0) + : _memory_group(std::move(memory_manager)), _digit_reverse_kernel(), _fft_kernels(), _scale_kernel(), _digit_reversed_input(), _digit_reverse_indices(), _num_ffts(0), _axis(0), _run_scale(false) { } @@ -43,6 +43,11 @@ void NEFFT1D::configure(const ITensor *input, ITensor *output, const FFT1DInfo & const auto decomposed_vector = arm_compute::helpers::fft::decompose_stages(N, supported_radix); ARM_COMPUTE_ERROR_ON(decomposed_vector.empty()); + // Flags + _run_scale = config.direction == FFTDirection::Inverse; + _axis = config.axis; + const bool is_c2r = input->info()->num_channels() == 2 && output->info()->num_channels() == 1; + // Configure digit reverse TensorInfo digit_reverse_indices_info(TensorShape(input->info()->tensor_shape()[config.axis]), 1, DataType::U32); _digit_reverse_indices.allocator()->init(digit_reverse_indices_info); @@ -51,19 +56,19 @@ void NEFFT1D::configure(const ITensor *input, ITensor *output, const FFT1DInfo & // Create and configure FFT kernels unsigned int Nx = 1; - _n_ffts = decomposed_vector.size(); - _fft_kernels.resize(_n_ffts); - for(unsigned int i = 0; i < _n_ffts; ++i) + + _num_ffts = decomposed_vector.size(); + _fft_kernels.resize(_num_ffts); + for(unsigned int i = 0; i < _num_ffts; ++i) { const unsigned int radix_for_stage = decomposed_vector.at(i); - FFTRadixStageKernelInfo fft_kernel_desc; - fft_kernel_desc.axis = config.axis; - fft_kernel_desc.radix = radix_for_stage; - fft_kernel_desc.Nx = Nx; - fft_kernel_desc.is_first_stage = (i == 0); - _fft_kernels[i].configure(&_digit_reversed_input, i == (_n_ffts - 1) ? output : nullptr, fft_kernel_desc); - + FFTRadixStageKernelInfo fft_kernel_info; + fft_kernel_info.axis = config.axis; + fft_kernel_info.radix = radix_for_stage; + fft_kernel_info.Nx = Nx; + fft_kernel_info.is_first_stage = (i == 0); + _fft_kernels[i].configure(&_digit_reversed_input, i == (_num_ffts - 1) && !is_c2r ? output : nullptr, fft_kernel_info); Nx *= radix_for_stage; } @@ -80,7 +85,7 @@ Status NEFFT1D::validate(const ITensorInfo *input, const ITensorInfo *output, co { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(config.axis != 0); + ARM_COMPUTE_RETURN_ERROR_ON(config.axis > 1); // Check if FFT is decomposable const auto supported_radix = NEFFTRadixStageKernel::supported_radix(); @@ -102,11 +107,17 @@ void NEFFT1D::run() { MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_digit_reverse_kernel, Window::DimY); + NEScheduler::get().schedule(&_digit_reverse_kernel, (_axis == 0 ? Window::DimY : Window::DimX)); + + for(unsigned int i = 0; i < _num_ffts; ++i) + { + NEScheduler::get().schedule(&_fft_kernels[i], (_axis == 0 ? Window::DimY : Window::DimX)); + } - for(unsigned int i = 0; i < _n_ffts; ++i) + // Run output scaling + if(_run_scale) { - NEScheduler::get().schedule(&_fft_kernels[i], Window::DimY); + NEScheduler::get().schedule(&_scale_kernel, Window::DimY); } } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEFFT2D.cpp b/src/runtime/NEON/functions/NEFFT2D.cpp new file mode 100644 index 0000000000..9210ecfa2e --- /dev/null +++ b/src/runtime/NEON/functions/NEFFT2D.cpp @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEFFT2D.h" + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/Scheduler.h" + +namespace arm_compute +{ +NEFFT2D::NEFFT2D(std::shared_ptr memory_manager) + : _memory_group(memory_manager), _first_pass_func(memory_manager), _second_pass_func(memory_manager), _first_pass_tensor() +{ +} + +void NEFFT2D::configure(const ITensor *input, ITensor *output, const FFT2DInfo &config) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_ERROR_THROW_ON(NEFFT2D::validate(input->info(), output->info(), config)); + + // Setup first pass + FFT1DInfo first_pass_config; + first_pass_config.axis = config.axes.first; + first_pass_config.direction = config.direction; + _memory_group.manage(&_first_pass_tensor); + _first_pass_func.configure(input, &_first_pass_tensor, first_pass_config); + + // Setup second pass + FFT1DInfo second_pass_config; + second_pass_config.axis = config.axes.second; + second_pass_config.direction = config.direction; + _second_pass_func.configure(&_first_pass_tensor, output, second_pass_config); + _first_pass_tensor.allocator()->allocate(); +} + +Status NEFFT2D::validate(const ITensorInfo *input, const ITensorInfo *output, const FFT2DInfo &config) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + + // Create intermediate tensor info + TensorInfo first_pass_tensor(input->clone()->set_is_resizable(true).reset_padding().set_num_channels(2)); + + // Validate first pass + FFT1DInfo first_pass_config; + first_pass_config.axis = config.axes.first; + first_pass_config.direction = config.direction; + ARM_COMPUTE_RETURN_ON_ERROR(NEFFT1D::validate(input, &first_pass_tensor, first_pass_config)); + + // Validate second pass + FFT1DInfo second_pass_config; + second_pass_config.axis = config.axes.second; + second_pass_config.direction = config.direction; + ARM_COMPUTE_RETURN_ON_ERROR(NEFFT1D::validate(&first_pass_tensor, output, second_pass_config)); + + // Checks performed when output is configured + if((output != nullptr) && (output->total_size() != 0)) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + } + + return Status{}; +} + +void NEFFT2D::run() +{ + _memory_group.acquire(); + + _first_pass_func.run(); + _second_pass_func.run(); + + _memory_group.release(); +} +} // namespace arm_compute -- cgit v1.2.1