aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-07-22 13:30:13 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-07-22 14:15:30 +0000
commit69a9ac4e7b870a0904c14217c2c6093136b276e7 (patch)
treed1ff631e0c340ff5c71cfd7f735dd0b51f54a4d8
parent1fa27ad2f3047a1e466f5757d64938ee8db90957 (diff)
downloadComputeLibrary-69a9ac4e7b870a0904c14217c2c6093136b276e7.tar.gz
Expose fast_math mode for GEMM through BFloat16
Resolves: COMPMID-4641 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I7ccc663b2692d40c370794caa906b5be8fd25a32 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5977 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h69
-rw-r--r--src/runtime/NEON/functions/NEConvolutionLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp8
-rw-r--r--src/runtime/cpu/operators/CpuGemmConvolution.cpp26
-rw-r--r--src/runtime/cpu/operators/CpuGemmConvolution.h79
5 files changed, 99 insertions, 85 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 2ebb80bef2..fe866dd941 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -76,46 +76,51 @@ public:
* |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
* |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
*
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*/
void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+ const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
*
- * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
+ * @param[in] input Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[in] output Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+ const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false, unsigned int num_groups = 1);
// Inherited methods overridden:
void run() override;
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index e88bd3b5d4..ade717805d 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -110,7 +110,7 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
ARM_COMPUTE_RETURN_ON_ERROR(NEWinogradConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info, enable_fast_math));
break;
case ConvolutionMethod::GEMM:
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math));
break;
case ConvolutionMethod::GEMM_CONV2D:
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMConv2d::validate(input, weights, biases, output, info));
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 23ffbce954..c405786c80 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -56,12 +56,12 @@ NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryMana
NEGEMMConvolutionLayer::~NEGEMMConvolutionLayer() = default;
void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
- const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
+ const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
_impl->weights = weights;
_impl->op = std::make_unique<cpu::CpuGemmConvolution>();
- _impl->op->configure(input->info(), weights->info(), (biases != nullptr ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info, num_groups);
+ _impl->op->configure(input->info(), weights->info(), (biases != nullptr ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups);
_impl->run_pack =
{
@@ -80,9 +80,9 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
}
Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
+ const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
{
- return cpu::CpuGemmConvolution::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups);
+ return cpu::CpuGemmConvolution::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups);
}
void NEGEMMConvolutionLayer::run()
diff --git a/src/runtime/cpu/operators/CpuGemmConvolution.cpp b/src/runtime/cpu/operators/CpuGemmConvolution.cpp
index fcdf8aa8f6..7defc13b20 100644
--- a/src/runtime/cpu/operators/CpuGemmConvolution.cpp
+++ b/src/runtime/cpu/operators/CpuGemmConvolution.cpp
@@ -58,15 +58,16 @@ CpuGemmConvolution::CpuGemmConvolution()
}
CpuGemmConvolution::~CpuGemmConvolution() = default;
-void CpuGemmConvolution::configure_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ActivationLayerInfo &act_info, int gemm_3d_depth)
+void CpuGemmConvolution::configure_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ActivationLayerInfo &act_info,
+ bool enable_fast_math, int gemm_3d_depth)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights);
- ARM_COMPUTE_ERROR_THROW_ON(validate_mm(src, weights, biases, dst, act_info, gemm_3d_depth, _skip_im2col));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_mm(src, weights, biases, dst, act_info, enable_fast_math, gemm_3d_depth, _skip_im2col));
// Create GEMMInfo structure
const GEMMInfo &gemm_info = GEMMInfo(false, false, true /* Reshape weights only for the first run */,
gemm_3d_depth, _skip_im2col /* Reinterpret the input as 3D if im2col is skipped */,
- false, GEMMLowpOutputStageInfo(), false, false, false, act_info);
+ false, GEMMLowpOutputStageInfo(), false, enable_fast_math, false, act_info);
// Supported activations in GEMM
const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
@@ -115,7 +116,7 @@ void CpuGemmConvolution::configure_mm(const ITensorInfo *src, const ITensorInfo
quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info);
_mm_gemmlowp = std::make_unique<CpuGemmLowpMatrixMultiplyCore>();
- _mm_gemmlowp->configure(&tmp_src, &tmp_weights, biases, dst, GEMMInfo(false, false, true, gemm_3d_depth, _skip_im2col, false, output_info, false, false, false, act_info));
+ _mm_gemmlowp->configure(&tmp_src, &tmp_weights, biases, dst, GEMMInfo(false, false, true, gemm_3d_depth, _skip_im2col, false, output_info, false, enable_fast_math, false, act_info));
auto mm_mem_req = _mm_gemmlowp->workspace();
for(unsigned int cont = 0; cont < mm_mem_req.size(); ++cont)
@@ -137,7 +138,7 @@ void CpuGemmConvolution::configure_mm(const ITensorInfo *src, const ITensorInfo
}
Status CpuGemmConvolution::validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
- const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col)
+ const ActivationLayerInfo &act_info, bool enable_fast_math, int gemm_3d_depth, bool skip_im2col)
{
const DataType data_type = src->data_type();
const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
@@ -146,7 +147,7 @@ Status CpuGemmConvolution::validate_mm(const ITensorInfo *src, const ITensorInfo
// Create GEMMInfo structure
const GEMMInfo gemm_info = GEMMInfo(false, false, true /* Reshape weights only for the first run */,
gemm_3d_depth, skip_im2col /* Reinterpret the input as 3D if im2col is skipped */,
- false, GEMMLowpOutputStageInfo(), false, false, false, act_info);
+ false, GEMMLowpOutputStageInfo(), false, enable_fast_math, false, act_info);
if(is_quantized)
{
@@ -186,8 +187,8 @@ Status CpuGemmConvolution::validate_mm(const ITensorInfo *src, const ITensorInfo
std::unique_ptr<ITensorInfo> weights_qa = weights->clone();
input_qa->set_quantization_info(QuantizationInfo(iqinfo.uniform().scale, -iqinfo.uniform().offset));
weights_qa->set_quantization_info(QuantizationInfo(wqinfo.uniform().scale, -wqinfo.uniform().offset));
- return CpuGemmLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, dst, GEMMInfo(false, false, true, gemm_3d_depth, skip_im2col, false, output_info, false, false, false,
- act_info));
+ return CpuGemmLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, dst, GEMMInfo(false, false, true, gemm_3d_depth, skip_im2col, false, output_info,
+ false, enable_fast_math, false, act_info));
}
else
{
@@ -211,7 +212,7 @@ Status CpuGemmConvolution::validate_gemm3d(const ITensorInfo *input_info, const
}
void CpuGemmConvolution::configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
- const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
+ const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
ARM_COMPUTE_UNUSED(num_groups, weights_info);
@@ -223,6 +224,7 @@ void CpuGemmConvolution::configure(const ITensorInfo *src, const ITensorInfo *we
weights_info,
dilation,
act_info,
+ enable_fast_math,
num_groups));
const DataType data_type = src->data_type();
@@ -324,7 +326,7 @@ void CpuGemmConvolution::configure(const ITensorInfo *src, const ITensorInfo *we
// Configure GEMM
// In case we need to skip col2im, GEMM3D (gemm_3d_depth != 0) must be called in order to avoid reshaping the output matrix
const unsigned int gemm_3d_depth = _skip_col2im ? conv_h : 0;
- configure_mm(gemm_input_to_use, &_weights_reshaped, biases, gemm_output_to_use, act_info, gemm_3d_depth);
+ configure_mm(gemm_input_to_use, &_weights_reshaped, biases, gemm_output_to_use, act_info, enable_fast_math, gemm_3d_depth);
if(!_skip_col2im && _data_layout == DataLayout::NCHW)
{
@@ -346,7 +348,7 @@ void CpuGemmConvolution::configure(const ITensorInfo *src, const ITensorInfo *we
}
Status CpuGemmConvolution::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
+ const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
@@ -470,7 +472,7 @@ Status CpuGemmConvolution::validate(const ITensorInfo *src, const ITensorInfo *w
}
info_gemm.set_quantization_info(dst->quantization_info()).set_data_layout(src->data_layout());
gemm_output_to_use = &info_gemm;
- ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases, gemm_output_to_use, act_info, skip_col2im ? conv_h : 0, skip_im2col));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases, gemm_output_to_use, act_info, enable_fast_math, skip_col2im ? conv_h : 0, skip_im2col));
// Validate Col2Im/ReshapeLayer
if(!skip_col2im && (data_layout == DataLayout::NCHW))
diff --git a/src/runtime/cpu/operators/CpuGemmConvolution.h b/src/runtime/cpu/operators/CpuGemmConvolution.h
index 8b41cb4a91..578586e7d1 100644
--- a/src/runtime/cpu/operators/CpuGemmConvolution.h
+++ b/src/runtime/cpu/operators/CpuGemmConvolution.h
@@ -87,24 +87,26 @@ public:
* |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
* |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
*
- * @param[in] src Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[out] dst Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
+ * @param[in] src Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[out] dst Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+ * Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+ * tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*/
void configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+ const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration
*
* Similar to CpuGemmConvolution::configure()
@@ -112,7 +114,8 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+ const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(),
+ bool enable_fast_math = false, unsigned int num_groups = 1);
// Inherited methods overridden:
void run(ITensorPack &tensors) override;
@@ -122,33 +125,37 @@ public:
private:
/** Configures the appropriate matrix multiply routine
*
- * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[out] dst Output tensor info. Data types supported: Same as @p input,
- * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
+ * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[out] dst Output tensor info. Data types supported: Same as @p input,
+ * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
*/
void configure_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- int gemm_3d_depth = 1);
+ bool enable_fast_math = false, int gemm_3d_depth = 1);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer matrix multiply routines
*
- * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
- * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
- * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[in] dst Output tensor info. Data types supported: Same as @p input,
- * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
- * @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false)
+ * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
+ * @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
+ * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[in] dst Output tensor info. Data types supported: Same as @p input,
+ * except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
+ * @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false)
*
* @return a status
*/
static Status validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- int gemm_3d_depth = 1, bool skip_im2col = false);
+ bool enable_fast_math = false, int gemm_3d_depth = 1, bool skip_im2col = false);
/** Static function to check if GEMM3D is supported in @ref NEGEMM or in @ref CpuGemmMLowpMatrixMultiplyCore
*
* @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.