aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2020-12-10 16:49:39 +0000
committerGiorgio Arena <giorgio.arena@arm.com>2020-12-14 13:58:17 +0000
commitea7de7babc319e2fa31c5e1c986e48d6c5370689 (patch)
tree2303791668c67eda76dfb14d07b912af1cb54a17 /arm_compute
parentec241b48ea7481e797285788fd68e5e1d42382bb (diff)
downloadComputeLibrary-ea7de7babc319e2fa31c5e1c986e48d6c5370689.tar.gz
Enable FFT for FP16
Resolves: COMPMID-4051 Change-Id: I0c0bf97212dd281c19d5081e6247e7dc0c23cd6b Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4687 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/runtime/CL/functions/CLFFT1D.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLFFT2D.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h68
-rw-r--r--arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h42
5 files changed, 70 insertions, 62 deletions
diff --git a/arm_compute/runtime/CL/functions/CLFFT1D.h b/arm_compute/runtime/CL/functions/CLFFT1D.h
index e88ee7650d..731bad5c32 100644
--- a/arm_compute/runtime/CL/functions/CLFFT1D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT1D.h
@@ -61,7 +61,7 @@ public:
~CLFFT1D();
/** Initialise the function's source, destinations and border mode.
*
- * @param[in] input Source tensor. Data types supported: F32.
+ * @param[in] input Source tensor. Data types supported: F16/F32.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* @param[in] config FFT related configuration
*/
@@ -69,14 +69,14 @@ public:
/** Initialise the function's source, destinations and border mode.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: F32.
+ * @param[in] input Source tensor. Data types supported: F16/F32.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* @param[in] config FFT related configuration
*/
void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config);
/** Static function to check if given info will lead to a valid configuration of @ref CLFFT1D.
*
- * @param[in] input Source tensor info. Data types supported: F32.
+ * @param[in] input Source tensor info. Data types supported: F16/F32.
* @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
* @param[in] config FFT related configuration
*
diff --git a/arm_compute/runtime/CL/functions/CLFFT2D.h b/arm_compute/runtime/CL/functions/CLFFT2D.h
index c54127f209..adc8e46cb2 100644
--- a/arm_compute/runtime/CL/functions/CLFFT2D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT2D.h
@@ -58,7 +58,7 @@ public:
~CLFFT2D();
/** Initialise the function's source, destinations and border mode.
*
- * @param[in] input Source tensor. Data types supported: F32.
+ * @param[in] input Source tensor. Data types supported: F16/F32.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* @param[in] config FFT related configuration
*/
@@ -66,14 +66,14 @@ public:
/** Initialise the function's source, destinations and border mode.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: F32.
+ * @param[in] input Source tensor. Data types supported: F16/F32.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* @param[in] config FFT related configuration
*/
void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config);
/** Static function to check if given info will lead to a valid configuration of @ref CLFFT2D.
*
- * @param[in] input Source tensor info. Data types supported: F32.
+ * @param[in] input Source tensor info. Data types supported: F16/F32.
* @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
* @param[in] config FFT related configuration
*
diff --git a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
index 53ce63333b..5085f5a66c 100644
--- a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
@@ -73,53 +73,59 @@ public:
*
* @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
*/
void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
/** Set the input and output tensors.
*
* @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
*
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
/** Static function to check if given info will lead to a valid configuration of @ref CLFFTConvolutionLayer
*
* @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
index 6432cd040d..b48f6eb6cc 100644
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -120,7 +120,7 @@ public:
/** Initialise the kernel's inputs, output.
*
* @param[in] compile_context The compile context to be used.
- * @param[in, out] input1 An input tensor. Data types supported: F32. Number of channels supported: 2.
+ * @param[in, out] input1 An input tensor. Data types supported: F16/F32. Number of channels supported: 2.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in, out] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -130,7 +130,7 @@ public:
void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplication
*
- * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
+ * @param[in] input1 An input tensor info. Data types supported: F16/F32. Number of channels supported: 2.
* @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
* @param[in] output The output tensor info, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
@@ -277,7 +277,7 @@ public:
CLComplexPixelWiseMultiplication &operator=(CLComplexPixelWiseMultiplication &&);
/** Initialise the kernel's inputs, output.
*
- * @param[in, out] input1 An input tensor. Data types supported: F32. Number of channels supported: 2.
+ * @param[in, out] input1 An input tensor. Data types supported: F16/F32. Number of channels supported: 2.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in, out] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -288,7 +288,7 @@ public:
/** Initialise the kernel's inputs, output.
*
* @param[in] compile_context The compile context to be used.
- * @param[in, out] input1 An input tensor. Data types supported: F32. Number of channels supported: 2.
+ * @param[in, out] input1 An input tensor. Data types supported: F16/F32. Number of channels supported: 2.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in, out] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -298,7 +298,7 @@ public:
void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplication
*
- * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
+ * @param[in] input1 An input tensor info. Data types supported: F16/F32. Number of channels supported: 2.
* @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
* @param[in] output The output tensor info, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
diff --git a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
index 37750e243b..b181e05c1a 100644
--- a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
@@ -75,36 +75,38 @@ public:
*
* @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for NEON backend.
*/
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
/** Static function to check if given info will lead to a valid configuration of @ref NEFFTConvolutionLayer
*
* @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
- * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+ * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for NEON backend.
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
// Inherited methods overridden:
void run() override;