diff options
Diffstat (limited to 'src/runtime/CL/functions/CLFFT1D.cpp')
-rw-r--r-- | src/runtime/CL/functions/CLFFT1D.cpp | 49 |
1 files changed, 38 insertions, 11 deletions
diff --git a/src/runtime/CL/functions/CLFFT1D.cpp b/src/runtime/CL/functions/CLFFT1D.cpp index d893cd3d1b..67111e7e5c 100644 --- a/src/runtime/CL/functions/CLFFT1D.cpp +++ b/src/runtime/CL/functions/CLFFT1D.cpp @@ -31,7 +31,7 @@ namespace arm_compute { CLFFT1D::CLFFT1D(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _digit_reversed_input(), _digit_reverse_indices(), _digit_reverse_kernel(), _fft_kernels(), _num_ffts(0) + : _memory_group(std::move(memory_manager)), _digit_reverse_kernel(), _fft_kernels(), _scale_kernel(), _digit_reversed_input(), _digit_reverse_indices(), _num_ffts(0), _run_scale(false) { } @@ -46,11 +46,18 @@ void CLFFT1D::configure(const ICLTensor *input, ICLTensor *output, const FFT1DIn const auto decomposed_vector = arm_compute::helpers::fft::decompose_stages(N, supported_radix); ARM_COMPUTE_ERROR_ON(decomposed_vector.empty()); + // Flags + _run_scale = config.direction == FFTDirection::Inverse; + const bool is_c2r = input->info()->num_channels() == 2 && output->info()->num_channels() == 1; + // Configure digit reverse + FFTDigitReverseKernelInfo digit_reverse_config; + digit_reverse_config.axis = config.axis; + digit_reverse_config.conjugate = config.direction == FFTDirection::Inverse; TensorInfo digit_reverse_indices_info(TensorShape(input->info()->tensor_shape()[config.axis]), 1, DataType::U32); _digit_reverse_indices.allocator()->init(digit_reverse_indices_info); _memory_group.manage(&_digit_reversed_input); - _digit_reverse_kernel.configure(input, &_digit_reversed_input, &_digit_reverse_indices, config.axis); + _digit_reverse_kernel.configure(input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config); // Create and configure FFT kernels unsigned int Nx = 1; @@ -60,16 +67,25 @@ void CLFFT1D::configure(const ICLTensor *input, ICLTensor *output, const FFT1DIn { const unsigned int radix_for_stage = decomposed_vector.at(i); - FFTRadixStageKernelDescriptor fft_kernel_desc; - fft_kernel_desc.axis = config.axis; - fft_kernel_desc.radix = radix_for_stage; - fft_kernel_desc.Nx = Nx; - fft_kernel_desc.is_first_stage = (i == 0); - _fft_kernels[i].configure(&_digit_reversed_input, i == (_num_ffts - 1) ? output : nullptr, fft_kernel_desc); + FFTRadixStageKernelInfo fft_kernel_info; + fft_kernel_info.axis = config.axis; + fft_kernel_info.radix = radix_for_stage; + fft_kernel_info.Nx = Nx; + fft_kernel_info.is_first_stage = (i == 0); + _fft_kernels[i].configure(&_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info); Nx *= radix_for_stage; } + // Configure scale kernel + if(_run_scale) + { + FFTScaleKernelInfo scale_config; + scale_config.scale = static_cast<float>(N); + scale_config.conjugate = config.direction == FFTDirection::Inverse; + is_c2r ? _scale_kernel.configure(&_digit_reversed_input, output, scale_config) : _scale_kernel.configure(output, nullptr, scale_config); + } + // Allocate tensors _digit_reversed_input.allocator()->allocate(); _digit_reverse_indices.allocator()->allocate(); @@ -84,8 +100,9 @@ void CLFFT1D::configure(const ICLTensor *input, ICLTensor *output, const FFT1DIn Status CLFFT1D::validate(const ITensorInfo *input, const ITensorInfo *output, const FFT1DInfo &config) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(config.axis != 0); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() != DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->num_channels() != 1 && input->num_channels() != 2); + ARM_COMPUTE_RETURN_ERROR_ON(std::set<unsigned int>({ 0, 1 }).count(config.axis) == 0); // Check if FFT is decomposable const auto supported_radix = CLFFTRadixStageKernel::supported_radix(); @@ -96,6 +113,8 @@ Status CLFFT1D::validate(const ITensorInfo *input, const ITensorInfo *output, co // Checks performed when output is configured if((output != nullptr) && (output->total_size() != 0)) { + ARM_COMPUTE_RETURN_ERROR_ON(output->num_channels() == 1 && input->num_channels() == 1); + ARM_COMPUTE_RETURN_ERROR_ON(output->num_channels() != 1 && output->num_channels() != 2); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); } @@ -107,11 +126,19 @@ void CLFFT1D::run() { MemoryGroupResourceScope scope_mg(_memory_group); + // Run digit reverse CLScheduler::get().enqueue(_digit_reverse_kernel, false); + // Run radix kernels for(unsigned int i = 0; i < _num_ffts; ++i) { - CLScheduler::get().enqueue(_fft_kernels[i], i == (_num_ffts - 1)); + CLScheduler::get().enqueue(_fft_kernels[i], i == (_num_ffts - 1) && !_run_scale); + } + + // Run output scaling + if(_run_scale) + { + CLScheduler::get().enqueue(_scale_kernel, true); } } } // namespace arm_compute |