From afc630fee1c019bfbc191c37d9d7fdf805b0b1d7 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 30 Mar 2020 14:09:27 +0100 Subject: COMPMID-3069: Resolve Bfloat16 validation issues. Signed-off-by: Georgios Pinitas Change-Id: Ic1c610cc01064071a7ff8b69ea598c4b0ddbe0ff Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2953 Reviewed-by: Sang-Hoon Park Reviewed-by: Pablo Marquez Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/runtime/NEON/functions/NEGEMM.cpp | 9 +++++++-- src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp | 5 +++++ tests/validation/reference/GEMMLowp.cpp | 4 +++- tests/validation/reference/QLSTMLayerNormalization.cpp | 4 +++- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 873145de12..2bd459a389 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -175,17 +175,22 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso const bool is_c_bias = gemm_info.reshape_b_only_on_first_run(); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(a); + ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(a); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::BFLOAT16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b); ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->dimension(0) != b->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported"); + if(a->data_type() != DataType::BFLOAT16) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, output); + } if(c != nullptr && !is_c_bias) { ARM_COMPUTE_RETURN_ERROR_ON(gemm_info.depth_output_gemm3d() != 0); ARM_COMPUTE_RETURN_ERROR_ON(gemm_info.reinterpret_input_as_3d()); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, c); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(c, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->dimension(1) != c->dimension(1), "The C matrix must have the same number of rows as the matrix A"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(b->dimension(0) != c->dimension(0), "The C matrix must have the same number of columns as the matrix B"); } diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index f6dc3a8f43..a41d23f8d7 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -418,6 +418,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI const bool append_bias = false; const bool is_quantized = is_data_type_quantized_asymmetric(data_type); + const bool is_bf16 = data_type == DataType::BFLOAT16; bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1); // Get convolved dimensions @@ -463,6 +464,10 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32); } + else if(is_bf16) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32); + } else { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); diff --git a/tests/validation/reference/GEMMLowp.cpp b/tests/validation/reference/GEMMLowp.cpp index 61617c8aae..36d86d1532 100644 --- a/tests/validation/reference/GEMMLowp.cpp +++ b/tests/validation/reference/GEMMLowp.cpp @@ -26,6 +26,8 @@ #include "arm_compute/core/Types.h" #include "tests/validation/reference/UtilsQuantizedAsymm.h" +#include "support/ToolchainSupport.h" + #include namespace arm_compute @@ -152,7 +154,7 @@ void quantize_down_scale_by_float(const SimpleTensor *in, const SimpleTenso const float_t multiplier = (is_per_channel) ? result_real_multiplier[i % cols_in] : result_real_multiplier[0]; float_t result_f = static_cast(result) * multiplier + static_cast(result_offset); - result = static_cast(std::round(result_f)); + result = static_cast(support::cpp11::round(result_f)); // Bounded ReLu if(min != max) diff --git a/tests/validation/reference/QLSTMLayerNormalization.cpp b/tests/validation/reference/QLSTMLayerNormalization.cpp index 6764a81617..90d59b93ad 100644 --- a/tests/validation/reference/QLSTMLayerNormalization.cpp +++ b/tests/validation/reference/QLSTMLayerNormalization.cpp @@ -28,6 +28,8 @@ #include "PixelWiseMultiplication.h" #include "src/core/utils/quantization/AsymmHelpers.cpp" +#include "support/ToolchainSupport.h" + namespace arm_compute { namespace test @@ -79,7 +81,7 @@ SimpleTensor qlstm_layer_normalization(const SimpleTensor &src for(int i = 0; i < output.num_elements(); i++) { - const auto output_val_s32 = static_cast(std::round(output_float[i] * std::pow(2, 12))); + const auto output_val_s32 = static_cast(support::cpp11::round(output_float[i] * std::pow(2, 12))); output[i] = utility::clamp(output_val_s32, std::numeric_limits::min()); } -- cgit v1.2.1