aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-03-26 17:23:28 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2019-04-11 09:34:26 +0000
commit8be9148814b88e5b0cabd5a4d2b1f4ff470a8c1c (patch)
tree760658b8c7b8917379467bd3fc119a5502faa850 /arm_compute
parenta50e702289af66944e860eafc7f3b32f6c5f30be (diff)
downloadComputeLibrary-8be9148814b88e5b0cabd5a4d2b1f4ff470a8c1c.tar.gz
COMPMID-1959: Implements 2D FFT on OpenCL
Change-Id: I73cf3984a5463acc854c8a59dc2bd9a5234cd99c Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/936 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/CL/CLKernels.h1
-rw-r--r--arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h10
-rw-r--r--arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLFFTScaleKernel.h78
-rw-r--r--arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h48
-rw-r--r--arm_compute/core/KernelDescriptors.h18
-rw-r--r--arm_compute/runtime/CL/CLFunctions.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLFFT1D.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLFFT2D.h76
-rw-r--r--arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h154
-rw-r--r--arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h27
-rw-r--r--arm_compute/runtime/FunctionDescriptors.h21
12 files changed, 431 insertions, 20 deletions
diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
index e3ffcd0704..57498715c8 100644
--- a/arm_compute/core/CL/CLKernels.h
+++ b/arm_compute/core/CL/CLKernels.h
@@ -66,6 +66,7 @@
#include "arm_compute/core/CL/kernels/CLErodeKernel.h"
#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h"
#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h"
+#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h"
#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
diff --git a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h b/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h
index 10652cdb4d..3082cb186f 100644
--- a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h
+++ b/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h
@@ -26,6 +26,8 @@
#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/KernelDescriptors.h"
+
namespace arm_compute
{
// Forward declarations
@@ -52,19 +54,19 @@ public:
* @param[in] input Source tensor. Data types supported: F32.
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] idx Digit reverse index tensor. Data type supported: U32
- * @param[in] axis Axis to perform digit reverse on.
+ * @param[in] config Kernel configuration.
*/
- void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, unsigned int axis);
+ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
/** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel
*
* @param[in] input Source tensor info. Data types supported: F32.
* @param[in] output Destination tensor info. Data type supported: same as @p input
* @param[in] idx Digit reverse index tensor info. Data type supported: U32
- * @param[in] axis Axis to perform digit reverse on.
+ * @param[in] config Kernel configuration.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, unsigned int axis);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h b/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h
index 9de775eafa..16fa390e5d 100644
--- a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h
+++ b/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h
@@ -59,7 +59,7 @@ public:
* @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input
* @param[in] config FFT descriptor metadata.
*/
- void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelDescriptor &config);
+ void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
/** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel
*
* @param[in] input Source tensor info. Data types supported: F32.
@@ -68,7 +68,7 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelDescriptor &config);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config);
/** Returns the radix that are support by the FFT kernel
*
* @return A set of supported radix
diff --git a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h b/arm_compute/core/CL/kernels/CLFFTScaleKernel.h
new file mode 100644
index 0000000000..39ecac42af
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLFFTScaleKernel.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLFFTSCALEKERNEL_H__
+#define __ARM_COMPUTE_CLFFTSCALEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** Interface for the inverse fft scale kernel. */
+class CLFFTScaleKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLFFTScaleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFFTScaleKernel(const CLFFTScaleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFFTScaleKernel &operator=(const CLFFTScaleKernel &) = delete;
+ /** Default Move Constructor. */
+ CLFFTScaleKernel(CLFFTScaleKernel &&) = default;
+ /** Default move assignment operator */
+ CLFFTScaleKernel &operator=(CLFFTScaleKernel &&) = default;
+ /** Default destructor */
+ ~CLFFTScaleKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in,out] input Source tensor. Data types supported: F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] config Kernel configuration
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: F32.
+ * @param[in] output Destination tensor info. Data type supported: same as @p input
+ * @param[in] config Kernel configuration
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_output;
+ bool _run_in_place;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLFFTSCALEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
index b835aa701b..804182b187 100644
--- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,11 +29,10 @@
namespace arm_compute
{
+// Forward declarations
class ICLTensor;
-/** Interface for the pixelwise multiplication kernel.
- *
- */
+/** Interface for the pixelwise multiplication kernel. */
class CLPixelWiseMultiplicationKernel : public ICLKernel
{
public:
@@ -83,5 +82,46 @@ private:
const ICLTensor *_input2;
ICLTensor *_output;
};
+
+/** Interface for the complex pixelwise multiplication kernel. */
+class CLComplexPixelWiseMultiplicationKernel : public ICLKernel
+{
+public:
+ /** Default constructor.*/
+ CLComplexPixelWiseMultiplicationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLComplexPixelWiseMultiplicationKernel(const CLComplexPixelWiseMultiplicationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLComplexPixelWiseMultiplicationKernel &operator=(const CLComplexPixelWiseMultiplicationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLComplexPixelWiseMultiplicationKernel(CLComplexPixelWiseMultiplicationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLComplexPixelWiseMultiplicationKernel &operator=(CLComplexPixelWiseMultiplicationKernel &&) = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2.
+ * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplicationKernel
+ *
+ * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
+ * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input1;
+ const ICLTensor *_input2;
+ ICLTensor *_output;
+};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H__ */
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index 186dbfb6d8..83131f4296 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -26,10 +26,24 @@
namespace arm_compute
{
+/** Descriptor for FFT scale kernels */
+struct FFTScaleKernelInfo
+{
+ float scale{ 0.f }; /**< Axis to perform the kernel on. */
+ bool conjugate{ true }; /**< Flag to conjugate the output/ */
+};
+
+/** Descriptor for FFT digit reverse kernels */
+struct FFTDigitReverseKernelInfo
+{
+ unsigned int axis{ 0 }; /**< Axis to perform the kernel on. */
+ bool conjugate{ false }; /**< Flag to conjugate the output/ */
+};
+
/** Descriptor used by the FFT core kernels */
-struct FFTRadixStageKernelDescriptor
+struct FFTRadixStageKernelInfo
{
- unsigned int axis{ 0 }; /**< Axis to run the FFT on. */
+ unsigned int axis{ 0 }; /**< Axis to run the kernel on. */
unsigned int radix{ 0 }; /**< Radix to use. */
unsigned int Nx{ 0 }; /**< Nx coefficient. */
bool is_first_stage{ false }; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index f1021843a0..a4fcdc27ac 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -67,6 +67,8 @@
#include "arm_compute/runtime/CL/functions/CLEqualizeHistogram.h"
#include "arm_compute/runtime/CL/functions/CLErode.h"
#include "arm_compute/runtime/CL/functions/CLFFT1D.h"
+#include "arm_compute/runtime/CL/functions/CLFFT2D.h"
+#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLFastCorners.h"
#include "arm_compute/runtime/CL/functions/CLFillBorder.h"
#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
diff --git a/arm_compute/runtime/CL/functions/CLFFT1D.h b/arm_compute/runtime/CL/functions/CLFFT1D.h
index 1612cf7f50..029023c524 100644
--- a/arm_compute/runtime/CL/functions/CLFFT1D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT1D.h
@@ -28,6 +28,7 @@
#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h"
#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h"
+#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h"
#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/FunctionDescriptors.h"
@@ -39,8 +40,9 @@ class ICLTensor;
/** Basic function to execute one dimensional FFT. This function calls the following OpenCL kernels:
*
- * -# @ref CLFFTDigitReverseKernel Performs digit reverse
- * -# @ref CLFFTRadixStageKernel A list of FFT kernels depending on the radix decomposition
+ * -# @ref CLFFTDigitReverseKernel Performs digit reverse.
+ * -# @ref CLFFTRadixStageKernel A list of FFT kernels depending on the radix decomposition.
+ * -# @ref CLFFTScaleKernel Performs output scaling in case of in inverse FFT.
*/
class CLFFT1D : public IFunction
{
@@ -69,11 +71,13 @@ public:
protected:
CLMemoryGroup _memory_group;
- CLTensor _digit_reversed_input;
- CLTensor _digit_reverse_indices;
CLFFTDigitReverseKernel _digit_reverse_kernel;
std::unique_ptr<CLFFTRadixStageKernel[]> _fft_kernels;
+ CLFFTScaleKernel _scale_kernel;
+ CLTensor _digit_reversed_input;
+ CLTensor _digit_reverse_indices;
unsigned int _num_ffts;
+ bool _run_scale;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLFFT1D_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLFFT2D.h b/arm_compute/runtime/CL/functions/CLFFT2D.h
new file mode 100644
index 0000000000..a0673ecc96
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLFFT2D.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLFFT2D_H__
+#define __ARM_COMPUTE_CLFFT2D_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLFFT1D.h"
+#include "arm_compute/runtime/FunctionDescriptors.h"
+
+namespace arm_compute
+{
+// Forward declaration
+class ICLTensor;
+
+/** Basic function to execute two dimensional FFT. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLFFT1D 1D FFT is performed on the first given axis
+ * -# @ref CLFFT1D 1D FFT is performed on the second given axis
+ */
+class CLFFT2D : public IFunction
+{
+public:
+ /** Default Constructor */
+ CLFFT2D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Initialise the function's source, destinations and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: F32.
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
+ * @param[in] config FFT related configuration
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLFFT2D.
+ *
+ * @param[in] input Source tensor info. Data types supported: F32.
+ * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
+ * @param[in] config FFT related configuration
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFT2DInfo &config);
+
+ // Inherited methods overridden:
+ void run() override;
+
+protected:
+ CLMemoryGroup _memory_group;
+ CLFFT1D _first_pass_func;
+ CLFFT1D _second_pass_func;
+ CLTensor _first_pass_tensor;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLFFT2D_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
new file mode 100644
index 0000000000..0fd2cf3cb1
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLFFTCONVOLUTIONLAYER_H__
+#define __ARM_COMPUTE_CLFFTCONVOLUTIONLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
+#include "arm_compute/runtime/CL/functions/CLFFT2D.h"
+#include "arm_compute/runtime/CL/functions/CLPadLayer.h"
+#include "arm_compute/runtime/CL/functions/CLPermute.h"
+#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
+#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
+#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
+#include "arm_compute/runtime/CL/functions/CLReverse.h"
+#include "arm_compute/runtime/CL/functions/CLSlice.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** Basic function to execute FFT-based convolution on OpenCL. This function calls the following OpenCL functions/kernels:
+ *
+ * -# @ref CLPermute Permute input if NHWC(only NCHW is supported).
+ * -# @ref CLPadLayer Pad input.
+ * -# @ref CLFFT2D Forward transform to the frequency domain.
+ * -# @ref CLComplexPixelWiseMultiplication Complex element-wise product of input and the weights.
+ * -# @ref CLReductionOperation Reduction across channels.
+ * -# @ref CLFFT2D Inverse transform back to the time domain.
+ * -# @ref CLStridedSlice Extract valid output.
+ * -# @ref CLArithmeticAddition Add bias.
+ * -# @ref CLActivationLayer Perform activation.
+ * -# @ref CLPermute Permute output if NHWC(only NCHW is supported).
+ */
+class CLFFTConvolutionLayer : public IFunction
+{
+public:
+ /** Default constructor */
+ CLFFTConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFFTConvolutionLayer(const CLFFTConvolutionLayer &) = delete;
+ /** Default move constructor */
+ CLFFTConvolutionLayer(CLFFTConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFFTConvolutionLayer &operator=(const CLFFTConvolutionLayer &) = delete;
+ /** Default move assignment operator */
+ CLFFTConvolutionLayer &operator=(CLFFTConvolutionLayer &&) = default;
+ /** Set the input and output tensors.
+ *
+ * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
+ *
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref CLFFTConvolutionLayer
+ *
+ * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
+ *
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
+
+private:
+ CLMemoryGroup _memory_group;
+ CLReverse _flip_weights_func;
+ CLPermute _permute_input_func;
+ CLPermute _permute_output_func;
+ CLPermute _permute_weights_func;
+ CLPermute _permute_bias_func;
+ CLPadLayer _pad_input_func;
+ CLPadLayer _pad_weights_func;
+ CLFFT2D _transform_input_func;
+ CLFFT2D _transform_weights_func;
+ CLFFT2D _itransform_output_func;
+ CLComplexPixelWiseMultiplication _prod_func;
+ CLReductionOperation _reduce_func;
+ CLSlice _extract_output_func;
+ CLArithmeticAddition _bias_add_func;
+ CLActivationLayer _activation_layer_func;
+
+ CLTensor _permuted_input;
+ CLTensor _permuted_weights;
+ CLTensor _permuted_bias;
+ CLTensor _permuted_output;
+ CLTensor _padded_input;
+ CLTensor _padded_weights;
+ CLTensor _flip_axis;
+ CLTensor _flipped_weights;
+ CLTensor _transformed_input;
+ CLTensor _transformed_weights;
+ CLTensor _input_weights_product;
+ CLTensor _output_product;
+ CLTensor _output_reduced;
+ CLTensor _itransformed_output;
+ CLTensor _reshaped_output;
+ CLTensor _bias_output;
+
+ const ICLTensor *_original_weights;
+ const ICLTensor *_original_bias;
+ bool _is_activationlayer_enabled;
+ bool _needs_permute;
+ bool _has_bias;
+ bool _is_prepared;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLFFTCONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
index a59fb4aba8..0fa40a77f2 100644
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,7 @@
namespace arm_compute
{
+// Forward declaration
class ICLTensor;
/** Basic function to run @ref CLPixelWiseMultiplicationKernel. */
@@ -64,5 +65,27 @@ public:
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale,
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
};
-}
+
+/** Basic function to run @ref CLComplexPixelWiseMultiplicationKernel. */
+class CLComplexPixelWiseMultiplication : public ICLSimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output.
+ *
+ * @param[in, out] input1 An input tensor. Data types supported: F32. Number of channels supported: 2.
+ * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+ * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+ * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ */
+ void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplication
+ *
+ * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
+ * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ * @param[in] output The output tensor info, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+};
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__ */
diff --git a/arm_compute/runtime/FunctionDescriptors.h b/arm_compute/runtime/FunctionDescriptors.h
index 7ff25019e6..f9b16e4218 100644
--- a/arm_compute/runtime/FunctionDescriptors.h
+++ b/arm_compute/runtime/FunctionDescriptors.h
@@ -24,12 +24,29 @@
#ifndef __ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H__
#define __ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H__
+#include <utility>
+
namespace arm_compute
{
-/** Descriptor used by the FFT1d function */
+/** FFT direction to use */
+enum class FFTDirection
+{
+ Forward,
+ Inverse
+};
+
+/** Descriptor used by the FFT1D function */
struct FFT1DInfo
{
- unsigned int axis{ 0 }; /**< Axis to run the FFT on. */
+ unsigned int axis{ 0 }; /**< Axis to run the FFT on. */
+ FFTDirection direction{ FFTDirection::Forward }; /**< Direction of the FFT. */
+};
+
+/** Descriptor used by the FFT2D function */
+struct FFT2DInfo
+{
+ std::pair<unsigned int, unsigned int> axes{ 0, 1 }; /**< Axes to run on. If same, multiple transforms are performed on single axis*/
+ FFTDirection direction{ FFTDirection::Forward }; /**< Direction of the FFT. */
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H__ */