From c7b183ab741650653289f8ce3bdeb4926521fdbd Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 6 Mar 2020 18:12:09 +0000 Subject: COMPMID-3160: Add Bfloat16 support in NEGEMMConvolutionLayer Signed-off-by: Georgios Pinitas Change-Id: I0e449306c138a562ffc1455e76ec44b2fd059d85 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2860 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- src/runtime/NEON/functions/NEGEMM.cpp | 4 ++-- src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp | 14 ++++++++++---- src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp | 14 ++++++++------ 3 files changed, 20 insertions(+), 12 deletions(-) (limited to 'src/runtime/NEON') diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index baa22b7d32..be964457fc 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -175,7 +175,7 @@ Status NEGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso const bool is_c_bias = gemm_info.reshape_b_only_on_first_run(); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(a); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::BFLOAT16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->dimension(0) != b->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported"); diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp index 91b91d6668..a3080e7f29 100644 --- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp @@ -505,17 +505,17 @@ NEGEMMAssemblyDispatch::NEGEMMAssemblyDispatch(std::shared_ptr m Status NEGEMMAssemblyDispatch::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const GEMMInfo &gemm_info) { - ARM_COMPUTE_UNUSED(gemm_info); - ARM_COMPUTE_UNUSED(c); + ARM_COMPUTE_UNUSED(gemm_info, c); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(a, b, d); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(a); + ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(a); #ifndef __aarch64__ ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->element_size() == 1, "8bit integer types only supported for aarch64"); #endif /* __aarch64__ */ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S8, - DataType::F16, DataType::F32); + DataType::BFLOAT16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::S8, - DataType::F16, DataType::F32); + DataType::BFLOAT16, DataType::F16, DataType::F32); if(is_data_type_quantized_per_channel(b->data_type())) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::QASYMM8_SIGNED, DataType::S8); @@ -526,6 +526,7 @@ Status NEGEMMAssemblyDispatch::validate(const ITensorInfo *a, const ITensorInfo } ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::F32 && d->data_type() != DataType::F32, "Only F32 output supported for F32 input"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::F16 && d->data_type() != DataType::F16, "Only F16 output supported for F16 input"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::BFLOAT16 && d->data_type() != DataType::F32, "Only F32 output supported for BFLOAT16 input"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::U8 && d->data_type() != DataType::U32, "Only U32 output supported for U8 input"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::S8 && d->data_type() != DataType::S32, "Only S32 output supported for S8 input"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::QASYMM8 && d->data_type() != DataType::QASYMM8, "Only QASYMM8 output supported for QASYMM8 input"); @@ -578,6 +579,11 @@ void NEGEMMAssemblyDispatch::configure(const ITensor *a, const ITensor *b, const } break; #endif /* __aarch64__ */ +#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) + case DataType::BFLOAT16: + create_arm_gemm(_arm_gemm, _memory_group, a, b, c, d, act, gemm_info, _weights_manager); + break; +#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */ #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: create_arm_gemm(_arm_gemm, _memory_group, a, b, c, d, act, gemm_info, _weights_manager); diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index 5701d60208..f6dc3a8f43 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -62,7 +62,7 @@ Status NEConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, co ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(weights); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, - DataType::F16, DataType::F32); + DataType::BFLOAT16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4); if(biases != nullptr) @@ -330,6 +330,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig } // Create temporary GEMM output tensor in case we cannot skip col2im + const DataType output_data_type = data_type == DataType::BFLOAT16 ? DataType::F32 : data_type; if(!_skip_col2im) { TensorShape shape_gemm; @@ -340,7 +341,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig shape_gemm.set(1, conv_w * conv_h); // FIXME: input->clone() doesn't work with subtensors for grouped convolutions. - TensorInfo info_gemm(shape_gemm, 1, data_type); + TensorInfo info_gemm(shape_gemm, 1, output_data_type); info_gemm.set_quantization_info(output->info()->quantization_info()).set_data_layout(input->info()->data_layout()); _gemm_output.allocator()->init(info_gemm); _memory_group.manage(&_gemm_output); @@ -392,8 +393,8 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!"); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::BFLOAT16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::BFLOAT16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Grouping (num_groups != 1) is not supported on NEON"); @@ -497,16 +498,17 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI } // Create temporary GEMM output tensor in case we cannot skip col2im + const DataType output_data_type = data_type == DataType::BFLOAT16 ? DataType::F32 : data_type; if(!skip_col2im) { TensorShape shape_gemm = gemm_input_to_use->tensor_shape(); shape_gemm.set(0, mat_weights_cols); shape_gemm.set(1, conv_w * conv_h); - info_gemm = TensorInfo(shape_gemm, 1, data_type); + info_gemm = TensorInfo(shape_gemm, 1, output_data_type); } else { - info_gemm = TensorInfo(output->tensor_shape(), 1, data_type); + info_gemm = TensorInfo(output->tensor_shape(), 1, output_data_type); } info_gemm.set_quantization_info(output->quantization_info()).set_data_layout(input->data_layout()); gemm_output_to_use = &info_gemm; -- cgit v1.2.1