aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-05-03 15:57:48 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:52:35 +0000
commita3221e6772dc371cf5de7e525bf5c22b58ad6d08 (patch)
tree14d224e07d92dbbd97966de0b6b0aa8e6a288022 /arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
parent20b4313365ea2ed31f59fd757f68f791f076e6bc (diff)
downloadComputeLibrary-a3221e6772dc371cf5de7e525bf5c22b58ad6d08.tar.gz
COMPMID-1106 Add fast math support in NEWinogradConvolutionLayer
Change-Id: I5fcbbb3b6f22204f0aaebbc319dfdf03593577e8 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130067 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h')
-rw-r--r--arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h51
1 files changed, 29 insertions, 22 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index 037c74c1a8..55921f78f3 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -45,6 +45,8 @@ class ITensor;
* -# @ref NEWinogradLayerTransformOutputKernel
* -# @ref NEWinogradLayerBatchedGEMMKernel
* -# @ref CPPPermute (three times: weights, input and output)
+ *
+ * @note Some Winograd configurations (i.e. F(2x2, 5x5), F(4x4, 5x5)) are supported only with enable_fast_math = true
*/
class NEWinogradConvolutionLayer : public IFunction
{
@@ -54,39 +56,44 @@ public:
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
- * Currently only 3x3 and 5x5 kernels are supported.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
+ * Currently only 3x3 and 5x5 kernels are supported.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
*/
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false);
// Inherited methods overridden:
void run() override;
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
- * Currently only 3x3 and 5x5 kernels are supported.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
- * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * Currently only 3x3 and 5x5 kernels are supported.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
+ * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. Currently only unit strides are supported.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
/** Prevent instances of this class from being copied (As this class contains pointers) */
NEWinogradConvolutionLayer(const NEWinogradConvolutionLayer &) = delete;