From 8be9148814b88e5b0cabd5a4d2b1f4ff470a8c1c Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 26 Mar 2019 17:23:28 +0000 Subject: COMPMID-1959: Implements 2D FFT on OpenCL Change-Id: I73cf3984a5463acc854c8a59dc2bd9a5234cd99c Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/936 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice --- arm_compute/core/CL/CLKernels.h | 1 + .../core/CL/kernels/CLFFTDigitReverseKernel.h | 10 +- .../core/CL/kernels/CLFFTRadixStageKernel.h | 4 +- arm_compute/core/CL/kernels/CLFFTScaleKernel.h | 78 +++++++++++ .../CL/kernels/CLPixelWiseMultiplicationKernel.h | 48 ++++++- arm_compute/core/KernelDescriptors.h | 18 ++- arm_compute/runtime/CL/CLFunctions.h | 2 + arm_compute/runtime/CL/functions/CLFFT1D.h | 12 +- arm_compute/runtime/CL/functions/CLFFT2D.h | 76 ++++++++++ .../runtime/CL/functions/CLFFTConvolutionLayer.h | 154 +++++++++++++++++++++ .../CL/functions/CLPixelWiseMultiplication.h | 27 +++- arm_compute/runtime/FunctionDescriptors.h | 21 ++- 12 files changed, 431 insertions(+), 20 deletions(-) create mode 100644 arm_compute/core/CL/kernels/CLFFTScaleKernel.h create mode 100644 arm_compute/runtime/CL/functions/CLFFT2D.h create mode 100644 arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h (limited to 'arm_compute') diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h index e3ffcd0704..57498715c8 100644 --- a/arm_compute/core/CL/CLKernels.h +++ b/arm_compute/core/CL/CLKernels.h @@ -66,6 +66,7 @@ #include "arm_compute/core/CL/kernels/CLErodeKernel.h" #include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h" #include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h" +#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h" #include "arm_compute/core/CL/kernels/CLFastCornersKernel.h" #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h" diff --git a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h b/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h index 10652cdb4d..3082cb186f 100644 --- a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h +++ b/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h @@ -26,6 +26,8 @@ #include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/KernelDescriptors.h" + namespace arm_compute { // Forward declarations @@ -52,19 +54,19 @@ public: * @param[in] input Source tensor. Data types supported: F32. * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] idx Digit reverse index tensor. Data type supported: U32 - * @param[in] axis Axis to perform digit reverse on. + * @param[in] config Kernel configuration. */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, unsigned int axis); + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config); /** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel * * @param[in] input Source tensor info. Data types supported: F32. * @param[in] output Destination tensor info. Data type supported: same as @p input * @param[in] idx Digit reverse index tensor info. Data type supported: U32 - * @param[in] axis Axis to perform digit reverse on. + * @param[in] config Kernel configuration. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, unsigned int axis); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h b/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h index 9de775eafa..16fa390e5d 100644 --- a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h +++ b/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h @@ -59,7 +59,7 @@ public: * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input * @param[in] config FFT descriptor metadata. */ - void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelDescriptor &config); + void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config); /** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel * * @param[in] input Source tensor info. Data types supported: F32. @@ -68,7 +68,7 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelDescriptor &config); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config); /** Returns the radix that are support by the FFT kernel * * @return A set of supported radix diff --git a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h b/arm_compute/core/CL/kernels/CLFFTScaleKernel.h new file mode 100644 index 0000000000..39ecac42af --- /dev/null +++ b/arm_compute/core/CL/kernels/CLFFTScaleKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFFTSCALEKERNEL_H__ +#define __ARM_COMPUTE_CLFFTSCALEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include "arm_compute/core/KernelDescriptors.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Interface for the inverse fft scale kernel. */ +class CLFFTScaleKernel : public ICLKernel +{ +public: + /** Constructor */ + CLFFTScaleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTScaleKernel(const CLFFTScaleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTScaleKernel &operator=(const CLFFTScaleKernel &) = delete; + /** Default Move Constructor. */ + CLFFTScaleKernel(CLFFTScaleKernel &&) = default; + /** Default move assignment operator */ + CLFFTScaleKernel &operator=(CLFFTScaleKernel &&) = default; + /** Default destructor */ + ~CLFFTScaleKernel() = default; + /** Set the input and output tensors. + * + * @param[in,out] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] config Kernel configuration + */ + void configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config); + /** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel + * + * @param[in] input Source tensor info. Data types supported: F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] config Kernel configuration + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; + bool _run_in_place; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLFFTSCALEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h index b835aa701b..804182b187 100644 --- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h +++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -29,11 +29,10 @@ namespace arm_compute { +// Forward declarations class ICLTensor; -/** Interface for the pixelwise multiplication kernel. - * - */ +/** Interface for the pixelwise multiplication kernel. */ class CLPixelWiseMultiplicationKernel : public ICLKernel { public: @@ -78,6 +77,47 @@ public: void run(const Window &window, cl::CommandQueue &queue) override; BorderSize border_size() const override; +private: + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; +}; + +/** Interface for the complex pixelwise multiplication kernel. */ +class CLComplexPixelWiseMultiplicationKernel : public ICLKernel +{ +public: + /** Default constructor.*/ + CLComplexPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComplexPixelWiseMultiplicationKernel(const CLComplexPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLComplexPixelWiseMultiplicationKernel &operator=(const CLComplexPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLComplexPixelWiseMultiplicationKernel(CLComplexPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLComplexPixelWiseMultiplicationKernel &operator=(CLComplexPixelWiseMultiplicationKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2. + * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplicationKernel + * + * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2. + * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + private: const ICLTensor *_input1; const ICLTensor *_input2; diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h index 186dbfb6d8..83131f4296 100644 --- a/arm_compute/core/KernelDescriptors.h +++ b/arm_compute/core/KernelDescriptors.h @@ -26,10 +26,24 @@ namespace arm_compute { +/** Descriptor for FFT scale kernels */ +struct FFTScaleKernelInfo +{ + float scale{ 0.f }; /**< Axis to perform the kernel on. */ + bool conjugate{ true }; /**< Flag to conjugate the output/ */ +}; + +/** Descriptor for FFT digit reverse kernels */ +struct FFTDigitReverseKernelInfo +{ + unsigned int axis{ 0 }; /**< Axis to perform the kernel on. */ + bool conjugate{ false }; /**< Flag to conjugate the output/ */ +}; + /** Descriptor used by the FFT core kernels */ -struct FFTRadixStageKernelDescriptor +struct FFTRadixStageKernelInfo { - unsigned int axis{ 0 }; /**< Axis to run the FFT on. */ + unsigned int axis{ 0 }; /**< Axis to run the kernel on. */ unsigned int radix{ 0 }; /**< Radix to use. */ unsigned int Nx{ 0 }; /**< Nx coefficient. */ bool is_first_stage{ false }; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */ diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h index f1021843a0..a4fcdc27ac 100644 --- a/arm_compute/runtime/CL/CLFunctions.h +++ b/arm_compute/runtime/CL/CLFunctions.h @@ -67,6 +67,8 @@ #include "arm_compute/runtime/CL/functions/CLEqualizeHistogram.h" #include "arm_compute/runtime/CL/functions/CLErode.h" #include "arm_compute/runtime/CL/functions/CLFFT1D.h" +#include "arm_compute/runtime/CL/functions/CLFFT2D.h" +#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLFastCorners.h" #include "arm_compute/runtime/CL/functions/CLFillBorder.h" #include "arm_compute/runtime/CL/functions/CLFlattenLayer.h" diff --git a/arm_compute/runtime/CL/functions/CLFFT1D.h b/arm_compute/runtime/CL/functions/CLFFT1D.h index 1612cf7f50..029023c524 100644 --- a/arm_compute/runtime/CL/functions/CLFFT1D.h +++ b/arm_compute/runtime/CL/functions/CLFFT1D.h @@ -28,6 +28,7 @@ #include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h" #include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h" +#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h" #include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/FunctionDescriptors.h" @@ -39,8 +40,9 @@ class ICLTensor; /** Basic function to execute one dimensional FFT. This function calls the following OpenCL kernels: * - * -# @ref CLFFTDigitReverseKernel Performs digit reverse - * -# @ref CLFFTRadixStageKernel A list of FFT kernels depending on the radix decomposition + * -# @ref CLFFTDigitReverseKernel Performs digit reverse. + * -# @ref CLFFTRadixStageKernel A list of FFT kernels depending on the radix decomposition. + * -# @ref CLFFTScaleKernel Performs output scaling in case of in inverse FFT. */ class CLFFT1D : public IFunction { @@ -69,11 +71,13 @@ public: protected: CLMemoryGroup _memory_group; - CLTensor _digit_reversed_input; - CLTensor _digit_reverse_indices; CLFFTDigitReverseKernel _digit_reverse_kernel; std::unique_ptr _fft_kernels; + CLFFTScaleKernel _scale_kernel; + CLTensor _digit_reversed_input; + CLTensor _digit_reverse_indices; unsigned int _num_ffts; + bool _run_scale; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_CLFFT1D_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLFFT2D.h b/arm_compute/runtime/CL/functions/CLFFT2D.h new file mode 100644 index 0000000000..a0673ecc96 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLFFT2D.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFFT2D_H__ +#define __ARM_COMPUTE_CLFFT2D_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/runtime/CL/CLMemoryGroup.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLFFT1D.h" +#include "arm_compute/runtime/FunctionDescriptors.h" + +namespace arm_compute +{ +// Forward declaration +class ICLTensor; + +/** Basic function to execute two dimensional FFT. This function calls the following OpenCL kernels: + * + * -# @ref CLFFT1D 1D FFT is performed on the first given axis + * -# @ref CLFFT1D 1D FFT is performed on the second given axis + */ +class CLFFT2D : public IFunction +{ +public: + /** Default Constructor */ + CLFFT2D(std::shared_ptr memory_manager = nullptr); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: F32. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * @param[in] config FFT related configuration + */ + void configure(const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config); + /** Static function to check if given info will lead to a valid configuration of @ref CLFFT2D. + * + * @param[in] input Source tensor info. Data types supported: F32. + * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. + * @param[in] config FFT related configuration + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFT2DInfo &config); + + // Inherited methods overridden: + void run() override; + +protected: + CLMemoryGroup _memory_group; + CLFFT1D _first_pass_func; + CLFFT1D _second_pass_func; + CLTensor _first_pass_tensor; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLFFT2D_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h new file mode 100644 index 0000000000..0fd2cf3cb1 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFFTCONVOLUTIONLAYER_H__ +#define __ARM_COMPUTE_CLFFTCONVOLUTIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" +#include "arm_compute/runtime/CL/functions/CLFFT2D.h" +#include "arm_compute/runtime/CL/functions/CLPadLayer.h" +#include "arm_compute/runtime/CL/functions/CLPermute.h" +#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h" +#include "arm_compute/runtime/CL/functions/CLReductionOperation.h" +#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" +#include "arm_compute/runtime/CL/functions/CLReverse.h" +#include "arm_compute/runtime/CL/functions/CLSlice.h" + +namespace arm_compute +{ +// Forward declarations +class ICLTensor; + +/** Basic function to execute FFT-based convolution on OpenCL. This function calls the following OpenCL functions/kernels: + * + * -# @ref CLPermute Permute input if NHWC(only NCHW is supported). + * -# @ref CLPadLayer Pad input. + * -# @ref CLFFT2D Forward transform to the frequency domain. + * -# @ref CLComplexPixelWiseMultiplication Complex element-wise product of input and the weights. + * -# @ref CLReductionOperation Reduction across channels. + * -# @ref CLFFT2D Inverse transform back to the time domain. + * -# @ref CLStridedSlice Extract valid output. + * -# @ref CLArithmeticAddition Add bias. + * -# @ref CLActivationLayer Perform activation. + * -# @ref CLPermute Permute output if NHWC(only NCHW is supported). + */ +class CLFFTConvolutionLayer : public IFunction +{ +public: + /** Default constructor */ + CLFFTConvolutionLayer(std::shared_ptr memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTConvolutionLayer(const CLFFTConvolutionLayer &) = delete; + /** Default move constructor */ + CLFFTConvolutionLayer(CLFFTConvolutionLayer &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFFTConvolutionLayer &operator=(const CLFFTConvolutionLayer &) = delete; + /** Default move assignment operator */ + CLFFTConvolutionLayer &operator=(CLFFTConvolutionLayer &&) = default; + /** Set the input and output tensors. + * + * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + */ + void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLFFTConvolutionLayer + * + * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info = ActivationLayerInfo()); + + // Inherited methods overridden: + void run() override; + void prepare() override; + +private: + CLMemoryGroup _memory_group; + CLReverse _flip_weights_func; + CLPermute _permute_input_func; + CLPermute _permute_output_func; + CLPermute _permute_weights_func; + CLPermute _permute_bias_func; + CLPadLayer _pad_input_func; + CLPadLayer _pad_weights_func; + CLFFT2D _transform_input_func; + CLFFT2D _transform_weights_func; + CLFFT2D _itransform_output_func; + CLComplexPixelWiseMultiplication _prod_func; + CLReductionOperation _reduce_func; + CLSlice _extract_output_func; + CLArithmeticAddition _bias_add_func; + CLActivationLayer _activation_layer_func; + + CLTensor _permuted_input; + CLTensor _permuted_weights; + CLTensor _permuted_bias; + CLTensor _permuted_output; + CLTensor _padded_input; + CLTensor _padded_weights; + CLTensor _flip_axis; + CLTensor _flipped_weights; + CLTensor _transformed_input; + CLTensor _transformed_weights; + CLTensor _input_weights_product; + CLTensor _output_product; + CLTensor _output_reduced; + CLTensor _itransformed_output; + CLTensor _reshaped_output; + CLTensor _bias_output; + + const ICLTensor *_original_weights; + const ICLTensor *_original_bias; + bool _is_activationlayer_enabled; + bool _needs_permute; + bool _has_bias; + bool _is_prepared; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLFFTCONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h index a59fb4aba8..0fa40a77f2 100644 --- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h +++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,7 @@ namespace arm_compute { +// Forward declaration class ICLTensor; /** Basic function to run @ref CLPixelWiseMultiplicationKernel. */ @@ -64,5 +65,27 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); }; -} + +/** Basic function to run @ref CLComplexPixelWiseMultiplicationKernel. */ +class CLComplexPixelWiseMultiplication : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output. + * + * @param[in, out] input1 An input tensor. Data types supported: F32. Number of channels supported: 2. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1. + */ + void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplication + * + * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2. + * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. + * @param[in] output The output tensor info, Data types supported: same as @p input1. Number of channels supported: same as @p input1. + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); +}; +} // namespace arm_compute #endif /*__ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__ */ diff --git a/arm_compute/runtime/FunctionDescriptors.h b/arm_compute/runtime/FunctionDescriptors.h index 7ff25019e6..f9b16e4218 100644 --- a/arm_compute/runtime/FunctionDescriptors.h +++ b/arm_compute/runtime/FunctionDescriptors.h @@ -24,12 +24,29 @@ #ifndef __ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H__ #define __ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H__ +#include + namespace arm_compute { -/** Descriptor used by the FFT1d function */ +/** FFT direction to use */ +enum class FFTDirection +{ + Forward, + Inverse +}; + +/** Descriptor used by the FFT1D function */ struct FFT1DInfo { - unsigned int axis{ 0 }; /**< Axis to run the FFT on. */ + unsigned int axis{ 0 }; /**< Axis to run the FFT on. */ + FFTDirection direction{ FFTDirection::Forward }; /**< Direction of the FFT. */ +}; + +/** Descriptor used by the FFT2D function */ +struct FFT2DInfo +{ + std::pair axes{ 0, 1 }; /**< Axes to run on. If same, multiple transforms are performed on single axis*/ + FFTDirection direction{ FFTDirection::Forward }; /**< Direction of the FFT. */ }; } // namespace arm_compute #endif /* __ARM_COMPUTE_RUNTIME_FUNCTION_DESCRIPTORS_H__ */ -- cgit v1.2.1