aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2020-03-06 18:12:09 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-03-12 12:12:30 +0000
commitc7b183ab741650653289f8ce3bdeb4926521fdbd (patch)
tree991e9f20340c91c288d52d8f9a64a3729e4a40b0 /arm_compute
parent6800117df3be825f0ec5c6cc71c4377322f51b99 (diff)
downloadComputeLibrary-c7b183ab741650653289f8ce3bdeb4926521fdbd.tar.gz
COMPMID-3160: Add Bfloat16 support in NEGEMMConvolutionLayer
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I0e449306c138a562ffc1455e76ec44b2fd059d85 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2860 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/kernels/NEIm2ColKernel.h6
-rw-r--r--arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h6
-rw-r--r--arm_compute/core/Utils.h2
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMM.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h42
-rw-r--r--arm_compute/runtime/NEON/functions/NEIm2Col.h8
6 files changed, 40 insertions, 30 deletions
diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
index 292e941272..1c358b379d 100644
--- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
+++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
@@ -77,7 +77,8 @@ public:
/** Set the input and output of the kernel.
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
* Note: QASYMM8 works only for has_bias = false
* @param[out] output The output tensor. Data types supported: Same as @p input
* @param[in] kernel_dims The kernel dimensions (width and height).
@@ -91,7 +92,8 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
* Note: QASYMM8 works only for has_bias = false
* @param[in] output The output tensor. Data types supported: Same as @p input
* @param[in] kernel_dims The kernel dimensions (width and height).
diff --git a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
index 46be5233eb..b68cb50c7b 100644
--- a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
+++ b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -76,7 +76,7 @@ public:
*
* @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
* and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/FP16/F32
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/FP16/F32
* @param[in] bias The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
* dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
* @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
@@ -87,7 +87,7 @@ public:
*
* @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
* and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32
* @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
* dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
* @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index 8577046af0..eff6157b1f 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -1342,7 +1342,7 @@ void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int
}
else if(std::is_same<typename std::decay<T>::type, bfloat16>::value)
{
- // We use T instead of print_type here is because the std::is_floating_point<bfloat> returns false and then the print_type becomes int.
+ // We use T instead of print_type here is because the std::is_floating_point<bfloat16> returns false and then the print_type becomes int.
s << std::right << float(ptr[i]) << element_delim;
}
else
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index f08bd9fac5..c87e806d0c 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -74,7 +74,7 @@ public:
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
* @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function.
*
- * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: F16/F32
+ * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: BLOAT16/F16/F32
* @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
* @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a
* @param[out] d Output tensor. Data type supported: same as @p a
@@ -86,7 +86,7 @@ public:
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMM.
*
- * @param[in] a First input tensor info (Matrix or Vector A). Data types supported: F16/F32
+ * @param[in] a First input tensor info (Matrix or Vector A). Data types supported: BLOAT16/F16/F32
* @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a.
* @param[in] c Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
* @param[out] output Output tensor info. Data type supported: same as @p a
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 660f55953e..5368384b19 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -63,18 +63,22 @@ public:
NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = default;
/** Set the input and output tensors.
*
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data types supported: Same as @p weights.
+ * Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED, FP32 if @p weights is BLOAT16
+ * @param[out] output Destination tensor.
+ * Data types supported: Same as @p weights, FP32 if @p weights is BLOAT16
*/
void configure(const ITensor *weights, const ITensor *biases, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayerReshapeWeights
*
- * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+ * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor. Data types supported: Same as @p weights.
+ * Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED, FP32 if @p weights is BLOAT16
+ * @param[in] output Destination tensor.
+ * Data types supported: Same as @p weights FP32 if @p weights is BLOAT16
*
* @return an error status
*/
@@ -136,7 +140,7 @@ private:
/** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions:
*
* -# @ref NEIm2ColKernel
- * -# @ref NEGEMM (if the data type is FP32 or FP16)
+ * -# @ref NEGEMM (if the data type is BLOAT16/FP16/FP32)
* -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED)
* -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if the data type is QASYMM8/QASYMM8_SIGNED)
* -# @ref NEArithmeticAdditionKernel (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout)
@@ -160,8 +164,9 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
@@ -179,8 +184,9 @@ public:
*
* @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
* @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
@@ -204,8 +210,8 @@ public:
private:
/** Configures the appropriate matrix multiply routine
*
- * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+ * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[out] output Output tensor. Data types supported: Same as @p input,
@@ -216,8 +222,8 @@ private:
void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(), int gemm_3d_depth = 1);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer matrix multiply routines
*
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
* @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
* @param[in] output Output tensor info. Data types supported: Same as @p input,
@@ -232,8 +238,8 @@ private:
int gemm_3d_depth = 1, bool skip_im2col = false);
/** Static function to check if GEMM3D is supported in @ref NEGEMM or in @ref NEGEMMLowpMatrixMultiplyCore
*
- * @param[in] input_info Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights_info Weights tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] input_info Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights_info Weights tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
* @param[in] act_info Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
* @param[in] gemm_3d_depth Depth of GEMM 3D
* @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout
diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h
index ac1b2674a5..cb905a3652 100644
--- a/arm_compute/runtime/NEON/functions/NEIm2Col.h
+++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,7 +44,8 @@ public:
/** Configure the im2col NEON kernel
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
* Note: QASYMM8 works only for has_bias = false
* @param[out] output The output tensor. Data types supported: Same as @p input
* @param[in] kernel_dims The kernel dimensions (width and height).
@@ -58,7 +59,8 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref NEIm2Col
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
* Note: QASYMM8 works only for has_bias = false
* @param[in] output The output tensor. Data types supported: Same as @p input
* @param[in] kernel_dims The kernel dimensions (width and height).