aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2020-12-10 16:49:39 +0000
committerGiorgio Arena <giorgio.arena@arm.com>2020-12-14 13:58:17 +0000
commitea7de7babc319e2fa31c5e1c986e48d6c5370689 (patch)
tree2303791668c67eda76dfb14d07b912af1cb54a17 /src/core/CL/kernels
parentec241b48ea7481e797285788fd68e5e1d42382bb (diff)
downloadComputeLibrary-ea7de7babc319e2fa31c5e1c986e48d6c5370689.tar.gz
Enable FFT for FP16
Resolves: COMPMID-4051 Change-Id: I0c0bf97212dd281c19d5081e6247e7dc0c23cd6b Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4687 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels')
-rw-r--r--src/core/CL/kernels/CLFFTDigitReverseKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLFFTDigitReverseKernel.h6
-rw-r--r--src/core/CL/kernels/CLFFTRadixStageKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLFFTRadixStageKernel.h6
-rw-r--r--src/core/CL/kernels/CLFFTScaleKernel.cpp3
-rw-r--r--src/core/CL/kernels/CLFFTScaleKernel.h6
-rw-r--r--src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h2
-rw-r--r--src/core/CL/kernels/CLReductionOperationKernel.cpp2
9 files changed, 23 insertions, 17 deletions
diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
index 922e50aa73..448f5a9c1e 100644
--- a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
+++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
@@ -38,7 +38,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() != DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(input->num_channels() != 1 && input->num_channels() != 2);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(idx, 1, DataType::U32);
ARM_COMPUTE_RETURN_ERROR_ON(std::set<unsigned int>({ 0, 1 }).count(config.axis) == 0);
@@ -90,6 +90,7 @@ void CLFFTDigitReverseKernel::configure(const CLCompileContext &compile_context,
// Create kernel
CLBuildOptions build_opts;
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(input->info()->num_channels()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option_if(config.conjugate, "-DCONJ");
std::string kernel_name = "fft_digit_reverse_axis_" + support::cpp11::to_string(config.axis);
_kernel = create_kernel(compile_context, kernel_name, build_opts.options());
diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.h b/src/core/CL/kernels/CLFFTDigitReverseKernel.h
index 2e2f1bdff4..e5583a4c22 100644
--- a/src/core/CL/kernels/CLFFTDigitReverseKernel.h
+++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.h
@@ -51,7 +51,7 @@ public:
~CLFFTDigitReverseKernel() = default;
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: F32.
+ * @param[in] input Source tensor. Data types supported: F16/F32.
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] idx Digit reverse index tensor. Data type supported: U32
* @param[in] config Kernel configuration.
@@ -60,7 +60,7 @@ public:
/** Set the input and output tensors.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: F32.
+ * @param[in] input Source tensor. Data types supported: F16/F32.
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] idx Digit reverse index tensor. Data type supported: U32
* @param[in] config Kernel configuration.
@@ -68,7 +68,7 @@ public:
void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
/** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel
*
- * @param[in] input Source tensor info. Data types supported: F32.
+ * @param[in] input Source tensor info. Data types supported: F16/F32.
* @param[in] output Destination tensor info. Data type supported: same as @p input
* @param[in] idx Digit reverse index tensor info. Data type supported: U32
* @param[in] config Kernel configuration.
diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
index 0f06640b64..68ccb5e8e6 100644
--- a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
+++ b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
@@ -42,7 +42,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(CLFFTRadixStageKernel::supported_radix().count(config.radix) == 0);
ARM_COMPUTE_RETURN_ERROR_ON(std::set<unsigned int>({ 0, 1 }).count(config.axis) == 0);
ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[config.axis] % config.radix);
@@ -99,6 +99,7 @@ void CLFFTRadixStageKernel::configure(const CLCompileContext &compile_context, I
// Create build options
CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option_if(_run_in_place, "-DIN_PLACE");
// Create kernel
diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.h b/src/core/CL/kernels/CLFFTRadixStageKernel.h
index c3cc510bdd..9bb310db83 100644
--- a/src/core/CL/kernels/CLFFTRadixStageKernel.h
+++ b/src/core/CL/kernels/CLFFTRadixStageKernel.h
@@ -55,7 +55,7 @@ public:
*
* @note If the output tensor is nullptr, the FFT will be performed in-place
*
- * @param[in,out] input Source tensor. Data types supported: F32.
+ * @param[in,out] input Source tensor. Data types supported: F16/F32.
* @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input
* @param[in] config FFT descriptor metadata.
*/
@@ -65,14 +65,14 @@ public:
* @note If the output tensor is nullptr, the FFT will be performed in-place
*
* @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: F32.
+ * @param[in,out] input Source tensor. Data types supported: F16/F32.
* @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input
* @param[in] config FFT descriptor metadata.
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
/** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel
*
- * @param[in] input Source tensor info. Data types supported: F32.
+ * @param[in] input Source tensor info. Data types supported: F16/F32.
* @param[in] output Destination tensor info. Can be nullptr. Data type supported: same as @p input
* @param[in] config FFT descriptor metadata.
*
diff --git a/src/core/CL/kernels/CLFFTScaleKernel.cpp b/src/core/CL/kernels/CLFFTScaleKernel.cpp
index 4dbe8d2e86..f82aeca34b 100644
--- a/src/core/CL/kernels/CLFFTScaleKernel.cpp
+++ b/src/core/CL/kernels/CLFFTScaleKernel.cpp
@@ -38,7 +38,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F16, DataType::F32);
// Checks performed when output is configured
if((output != nullptr) && (output->total_size() != 0))
@@ -94,6 +94,7 @@ void CLFFTScaleKernel::configure(const CLCompileContext &compile_context, ICLTen
CLBuildOptions build_opts;
build_opts.add_option_if(_run_in_place, "-DIN_PLACE");
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(output != nullptr ? output->info()->num_channels() : input->info()->num_channels()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option_if(config.conjugate, "-DCONJ");
std::string kernel_name = "fft_scale_conj";
_kernel = create_kernel(compile_context, kernel_name, build_opts.options());
diff --git a/src/core/CL/kernels/CLFFTScaleKernel.h b/src/core/CL/kernels/CLFFTScaleKernel.h
index cb007e5307..cc518be193 100644
--- a/src/core/CL/kernels/CLFFTScaleKernel.h
+++ b/src/core/CL/kernels/CLFFTScaleKernel.h
@@ -51,7 +51,7 @@ public:
~CLFFTScaleKernel() = default;
/** Set the input and output tensors.
*
- * @param[in,out] input Source tensor. Data types supported: F32.
+ * @param[in,out] input Source tensor. Data types supported: F16/F32.
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] config Kernel configuration
*/
@@ -59,14 +59,14 @@ public:
/** Set the input and output tensors.
*
* @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: F32.
+ * @param[in,out] input Source tensor. Data types supported: F16/F32.
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] config Kernel configuration
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
/** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel
*
- * @param[in] input Source tensor info. Data types supported: F32.
+ * @param[in] input Source tensor info. Data types supported: F16/F32.
* @param[in] output Destination tensor info. Data type supported: same as @p input
* @param[in] config Kernel configuration
*
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
index a6255f8018..c68c526ec9 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp
@@ -329,8 +329,9 @@ constexpr unsigned int num_elems_processed_per_iteration_complex = 1;
Status validate_arguments_complex(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 2, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 2, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 2, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 2, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
@@ -340,7 +341,8 @@ Status validate_arguments_complex(const ITensorInfo *input1, const ITensorInfo *
// Validate in case of configured output
if(output->total_size() > 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 2, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 2, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, output);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), "Wrong shape for output");
}
@@ -400,6 +402,7 @@ void CLComplexPixelWiseMultiplicationKernel::configure(const CLCompileContext &c
_output = output;
CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(_output->data_type()));
if(act_info.enabled())
{
build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation())));
diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
index 0cc4005875..74102fd397 100644
--- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
+++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
@@ -157,7 +157,7 @@ public:
CLComplexPixelWiseMultiplicationKernel &operator=(CLComplexPixelWiseMultiplicationKernel &&) = default;
/** Initialise the kernel's input, output and border mode.
*
- * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
+ * @param[in] input1 An input tensor info. Data types supported: F16/F32. Number of channels supported: 2.
* @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
* @param[out] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index 9d49a2193a..2697a0df98 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -55,7 +55,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u
}
else
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F16, DataType::F32);
}
ARM_COMPUTE_RETURN_ERROR_ON_MSG(op == ReductionOperation::SUM_SQUARE && input->data_type() == DataType::QASYMM8, "Not supported reduction operation for QASYMM8");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");