Enable FFT for FP16

Resolves: COMPMID-4051 Change-Id: I0c0bf97212dd281c19d5081e6247e7dc0c23cd6b Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4687 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Giorgio Arena <giorgio.arena@arm.com> 2020-12-10 16:49:39 +0000
committer: Giorgio Arena <giorgio.arena@arm.com> 2020-12-14 13:58:17 +0000
commit: ea7de7babc319e2fa31c5e1c986e48d6c5370689 (patch)
tree: 2303791668c67eda76dfb14d07b912af1cb54a17 /src/core/CL/kernels
parent: ec241b48ea7481e797285788fd68e5e1d42382bb (diff)
download: ComputeLibrary-ea7de7babc319e2fa31c5e1c986e48d6c5370689.tar.gz
9 files changed, 23 insertions, 17 deletions
diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
index 922e50aa73..448f5a9c1e 100644
--- a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
+++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
@@ -38,7 +38,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
-    ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() != DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON(input->num_channels() != 1 && input->num_channels() != 2);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(idx, 1, DataType::U32);
     ARM_COMPUTE_RETURN_ERROR_ON(std::set<unsigned int>({ 0, 1 }).count(config.axis) == 0);
@@ -90,6 +90,7 @@ void CLFFTDigitReverseKernel::configure(const CLCompileContext &compile_context,
     // Create kernel
     CLBuildOptions build_opts;
     build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(input->info()->num_channels()));
+    build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
     build_opts.add_option_if(config.conjugate, "-DCONJ");
     std::string kernel_name = "fft_digit_reverse_axis_" + support::cpp11::to_string(config.axis);
     _kernel                 = create_kernel(compile_context, kernel_name, build_opts.options());
diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.h b/src/core/CL/kernels/CLFFTDigitReverseKernel.h
index 2e2f1bdff4..e5583a4c22 100644
--- a/src/core/CL/kernels/CLFFTDigitReverseKernel.h
+++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.h
@@ -51,7 +51,7 @@ public:
     ~CLFFTDigitReverseKernel() = default;
     /** Set the input and output tensors.
      *
-     * @param[in]  input  Source tensor. Data types supported: F32.
+     * @param[in]  input  Source tensor. Data types supported: F16/F32.
      * @param[out] output Destination tensor. Data type supported: same as @p input
      * @param[in]  idx    Digit reverse index tensor. Data type supported: U32
      * @param[in]  config Kernel configuration.
@@ -60,7 +60,7 @@ public:
     /** Set the input and output tensors.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: F32.
+     * @param[in]  input           Source tensor. Data types supported: F16/F32.
      * @param[out] output          Destination tensor. Data type supported: same as @p input
      * @param[in]  idx             Digit reverse index tensor. Data type supported: U32
      * @param[in]  config          Kernel configuration.
@@ -68,7 +68,7 @@ public:
     void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel
      *
-     * @param[in] input  Source tensor info. Data types supported: F32.
+     * @param[in] input  Source tensor info. Data types supported: F16/F32.
      * @param[in] output Destination tensor info. Data type supported: same as @p input
      * @param[in] idx    Digit reverse index tensor info. Data type supported: U32
      * @param[in] config Kernel configuration.
diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
index 0f06640b64..68ccb5e8e6 100644
--- a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
+++ b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
@@ -42,7 +42,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON(CLFFTRadixStageKernel::supported_radix().count(config.radix) == 0);
     ARM_COMPUTE_RETURN_ERROR_ON(std::set<unsigned int>({ 0, 1 }).count(config.axis) == 0);
     ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[config.axis] % config.radix);
@@ -99,6 +99,7 @@ void CLFFTRadixStageKernel::configure(const CLCompileContext &compile_context, I
 
     // Create build options
     CLBuildOptions build_opts;
+    build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
     build_opts.add_option_if(_run_in_place, "-DIN_PLACE");
 
     // Create kernel
diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.h b/src/core/CL/kernels/CLFFTRadixStageKernel.h
index c3cc510bdd..9bb310db83 100644
--- a/src/core/CL/kernels/CLFFTRadixStageKernel.h
+++ b/src/core/CL/kernels/CLFFTRadixStageKernel.h
@@ -55,7 +55,7 @@ public:
      *
      * @note If the output tensor is nullptr, the FFT will be performed in-place
      *
-     * @param[in,out] input  Source tensor. Data types supported: F32.
+     * @param[in,out] input  Source tensor. Data types supported: F16/F32.
      * @param[out]    output Destination tensor. Can be nullptr. Data type supported: same as @p input
      * @param[in]     config FFT descriptor metadata.
      */
@@ -65,14 +65,14 @@ public:
      * @note If the output tensor is nullptr, the FFT will be performed in-place
      *
      * @param[in]     compile_context The compile context to be used.
-     * @param[in,out] input           Source tensor. Data types supported: F32.
+     * @param[in,out] input           Source tensor. Data types supported: F16/F32.
      * @param[out]    output          Destination tensor. Can be nullptr. Data type supported: same as @p input
      * @param[in]     config          FFT descriptor metadata.
      */
     void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel
      *
-     * @param[in] input  Source tensor info. Data types supported: F32.
+     * @param[in] input  Source tensor info. Data types supported: F16/F32.
      * @param[in] output Destination tensor info. Can be nullptr. Data type supported: same as @p input
      * @param[in] config FFT descriptor metadata.
      *
diff --git a/src/core/CL/kernels/CLFFTScaleKernel.cpp b/src/core/CL/kernels/CLFFTScaleKernel.cpp
index 4dbe8d2e86..f82aeca34b 100644
--- a/src/core/CL/kernels/CLFFTScaleKernel.cpp
+++ b/src/core/CL/kernels/CLFFTScaleKernel.cpp
@@ -38,7 +38,7 @@ namespace
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F16, DataType::F32);
 
     // Checks performed when output is configured
     if((output != nullptr) && (output->total_size() != 0))
@@ -94,6 +94,7 @@ void CLFFTScaleKernel::configure(const CLCompileContext &compile_context, ICLTen
     CLBuildOptions build_opts;
     build_opts.add_option_if(_run_in_place, "-DIN_PLACE");
     build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(output != nullptr ? output->info()->num_channels() : input->info()->num_channels()));
+    build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
     build_opts.add_option_if(config.conjugate, "-DCONJ");
     std::string kernel_name = "fft_scale_conj";
     _kernel                 = create_kernel(compile_context, kernel_name, build_opts.options());
diff --git a/src/core/CL/kernels/CLFFTScaleKernel.h b/src/core/CL/kernels/CLFFTScaleKernel.h
index cb007e5307..cc518be193 100644
--- a/src/core/CL/kernels/CLFFTScaleKernel.h
+++ b/src/core/CL/kernels/CLFFTScaleKernel.h
@@ -51,7 +51,7 @@ public:
     ~CLFFTScaleKernel() = default;
     /** Set the input and output tensors.
      *
-     * @param[in,out] input  Source tensor. Data types supported: F32.
+     * @param[in,out] input  Source tensor. Data types supported: F16/F32.
      * @param[out]    output Destination tensor. Data type supported: same as @p input
      * @param[in]     config Kernel configuration
      */
@@ -59,14 +59,14 @@ public:
     /** Set the input and output tensors.
      *
      * @param[in]     compile_context The compile context to be used.
-     * @param[in,out] input           Source tensor. Data types supported: F32.
+     * @param[in,out] input           Source tensor. Data types supported: F16/F32.
      * @param[out]    output          Destination tensor. Data type supported: same as @p input
      * @param[in]     config          Kernel configuration
      */
     void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel
      *
-     * @param[in] input  Source tensor info. Data types supported: F32.
+     * @param[in] input  Source tensor info. Data types supported: F16/F32.
      * @param[in] output Destination tensor info. Data type supported: same as @p input
      * @param[in] config Kernel configuration
      *
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
index a6255f8018..c68c526ec9 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
@@ -329,8 +329,9 @@ constexpr unsigned int num_elems_processed_per_iteration_complex = 1;
 
 Status validate_arguments_complex(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 2, DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 2, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 2, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 2, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
 
     const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
 
@@ -340,7 +341,8 @@ Status validate_arguments_complex(const ITensorInfo *input1, const ITensorInfo *
     // Validate in case of configured output
     if(output->total_size() > 0)
     {
-        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 2, DataType::F32);
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 2, DataType::F16, DataType::F32);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, output);
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), "Wrong shape for output");
     }
 
@@ -400,6 +402,7 @@ void CLComplexPixelWiseMultiplicationKernel::configure(const CLCompileContext &c
     _output = output;
 
     CLBuildOptions build_opts;
+    build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(_output->data_type()));
     if(act_info.enabled())
     {
         build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation())));
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
index 0cc4005875..74102fd397 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
@@ -157,7 +157,7 @@ public:
     CLComplexPixelWiseMultiplicationKernel &operator=(CLComplexPixelWiseMultiplicationKernel &&) = default;
     /** Initialise the kernel's input, output and border mode.
      *
-     * @param[in]  input1   An input tensor info. Data types supported: F32. Number of channels supported: 2.
+     * @param[in]  input1   An input tensor info. Data types supported: F16/F32. Number of channels supported: 2.
      * @param[in]  input2   An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
      * @param[out] output   The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
      * @param[in]  act_info (Optional) Activation layer information in case of a fused activation.
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index 9d49a2193a..2697a0df98 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -55,7 +55,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u
     }
     else
     {
-        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F32);
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F16, DataType::F32);
     }
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(op == ReductionOperation::SUM_SQUARE && input->data_type() == DataType::QASYMM8, "Not supported reduction operation for QASYMM8");
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
author	Giorgio Arena <giorgio.arena@arm.com>	2020-12-10 16:49:39 +0000
committer	Giorgio Arena <giorgio.arena@arm.com>	2020-12-14 13:58:17 +0000
commit	ea7de7babc319e2fa31c5e1c986e48d6c5370689 (patch)
tree	2303791668c67eda76dfb14d07b912af1cb54a17 /src/core/CL/kernels
parent	ec241b48ea7481e797285788fd68e5e1d42382bb (diff)
download	ComputeLibrary-ea7de7babc319e2fa31c5e1c986e48d6c5370689.tar.gz