From afd38f0c617d6f89b2b4532c6c44f116617e2b6f Mon Sep 17 00:00:00 2001 From: Felix Thomasmathibalan Date: Wed, 27 Sep 2023 17:46:17 +0100 Subject: Apply clang-format on repository Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir --- .../kernels/NEFuseBatchNormalizationKernel.cpp | 244 ++++++++++++--------- 1 file changed, 137 insertions(+), 107 deletions(-) (limited to 'src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp') diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp index 51a69046a9..cbe5136fb1 100644 --- a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp @@ -22,7 +22,6 @@ * SOFTWARE. */ #include "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" -#include "src/cpu/kernels/fuse_batch_normalization/list.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -30,12 +29,14 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" + #include "src/common/cpuinfo/CpuIsaInfo.h" -#include "src/core/CPP/Validate.h" -#include "src/core/NEON/wrapper/wrapper.h" #include "src/core/common/Registrars.h" +#include "src/core/CPP/Validate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" +#include "src/core/NEON/wrapper/wrapper.h" +#include "src/cpu/kernels/fuse_batch_normalization/list.h" #include @@ -52,8 +53,16 @@ struct FuseBatchNormalizeSelectorData }; using FBNSelectorPtr = std::add_pointer::type; -using FBNUKernelPtr = std::add_pointer::type; +using FBNUKernelPtr = std::add_pointer::type; struct FBNUKernel { @@ -62,73 +71,63 @@ struct FBNUKernel FBNUKernelPtr ukernel; }; -static const FBNUKernel available_kernels[] = -{ - { - "fused_batch_normalization_conv_NHWC_F16", - [](const FuseBatchNormalizeSelectorData & data) - { - return data.dt == DataType::F16 && data.dl == DataLayout::NHWC && data.isa.fp16 && data.fbn_type == FuseBatchNormalizationType::CONVOLUTION; - }, - REGISTER_FP16_NEON(arm_compute::cpu::fused_batch_normalization_conv_f16) - }, - { - "fused_batch_normalization_conv_NCHW_F16", - [](const FuseBatchNormalizeSelectorData & data) - { - return data.dt == DataType::F16 && data.dl == DataLayout::NCHW && data.isa.fp16 && data.fbn_type == FuseBatchNormalizationType::CONVOLUTION; - }, - REGISTER_FP16_NEON(arm_compute::cpu::fused_batch_normalization_conv_f16) - }, - { - "fused_batch_normalization_dwc_NHWC_F16", - [](const FuseBatchNormalizeSelectorData & data) - { - return data.dt == DataType::F16 && data.dl == DataLayout::NHWC && data.isa.fp16 && data.fbn_type == FuseBatchNormalizationType::DEPTHWISECONVOLUTION; - }, - REGISTER_FP16_NEON(arm_compute::cpu::fused_batch_normalization_dwc_nhwc_f16) - }, - { - "fused_batch_normalization_dwc_NCHW_F16", - [](const FuseBatchNormalizeSelectorData & data) - { - return data.dt == DataType::F16 && data.dl == DataLayout::NCHW && data.isa.fp16 && data.fbn_type == FuseBatchNormalizationType::DEPTHWISECONVOLUTION; - }, - REGISTER_FP16_NEON(arm_compute::cpu::fused_batch_normalization_dwc_nchw_f16) - }, - { - "fused_batch_normalization_conv_NHWC_F32", - [](const FuseBatchNormalizeSelectorData & data) - { - return data.dt == DataType::F32 && data.dl == DataLayout::NHWC && data.fbn_type == FuseBatchNormalizationType::CONVOLUTION; - }, - REGISTER_FP32_NEON(arm_compute::cpu::fused_batch_normalization_conv_f32) - }, - { - "fused_batch_normalization_conv_NCHW_F32", - [](const FuseBatchNormalizeSelectorData & data) - { - return data.dt == DataType::F32 && data.dl == DataLayout::NCHW && data.fbn_type == FuseBatchNormalizationType::CONVOLUTION; - }, - REGISTER_FP32_NEON(arm_compute::cpu::fused_batch_normalization_conv_f32) - }, - { - "fused_batch_normalization_dwc_NHWC_F32", - [](const FuseBatchNormalizeSelectorData & data) - { - return data.dt == DataType::F32 && data.dl == DataLayout::NHWC && data.fbn_type == FuseBatchNormalizationType::DEPTHWISECONVOLUTION; - }, - REGISTER_FP32_NEON(arm_compute::cpu::fused_batch_normalization_dwc_nhwc_f32) - }, - { - "fused_batch_normalization_dwc_NCHW_F32", - [](const FuseBatchNormalizeSelectorData & data) - { - return data.dt == DataType::F32 && data.dl == DataLayout::NCHW && data.fbn_type == FuseBatchNormalizationType::DEPTHWISECONVOLUTION; - }, - REGISTER_FP32_NEON(arm_compute::cpu::fused_batch_normalization_dwc_nchw_f32) - } -}; +static const FBNUKernel available_kernels[] = { + {"fused_batch_normalization_conv_NHWC_F16", + [](const FuseBatchNormalizeSelectorData &data) + { + return data.dt == DataType::F16 && data.dl == DataLayout::NHWC && data.isa.fp16 && + data.fbn_type == FuseBatchNormalizationType::CONVOLUTION; + }, + REGISTER_FP16_NEON(arm_compute::cpu::fused_batch_normalization_conv_f16)}, + {"fused_batch_normalization_conv_NCHW_F16", + [](const FuseBatchNormalizeSelectorData &data) + { + return data.dt == DataType::F16 && data.dl == DataLayout::NCHW && data.isa.fp16 && + data.fbn_type == FuseBatchNormalizationType::CONVOLUTION; + }, + REGISTER_FP16_NEON(arm_compute::cpu::fused_batch_normalization_conv_f16)}, + {"fused_batch_normalization_dwc_NHWC_F16", + [](const FuseBatchNormalizeSelectorData &data) + { + return data.dt == DataType::F16 && data.dl == DataLayout::NHWC && data.isa.fp16 && + data.fbn_type == FuseBatchNormalizationType::DEPTHWISECONVOLUTION; + }, + REGISTER_FP16_NEON(arm_compute::cpu::fused_batch_normalization_dwc_nhwc_f16)}, + {"fused_batch_normalization_dwc_NCHW_F16", + [](const FuseBatchNormalizeSelectorData &data) + { + return data.dt == DataType::F16 && data.dl == DataLayout::NCHW && data.isa.fp16 && + data.fbn_type == FuseBatchNormalizationType::DEPTHWISECONVOLUTION; + }, + REGISTER_FP16_NEON(arm_compute::cpu::fused_batch_normalization_dwc_nchw_f16)}, + {"fused_batch_normalization_conv_NHWC_F32", + [](const FuseBatchNormalizeSelectorData &data) + { + return data.dt == DataType::F32 && data.dl == DataLayout::NHWC && + data.fbn_type == FuseBatchNormalizationType::CONVOLUTION; + }, + REGISTER_FP32_NEON(arm_compute::cpu::fused_batch_normalization_conv_f32)}, + {"fused_batch_normalization_conv_NCHW_F32", + [](const FuseBatchNormalizeSelectorData &data) + { + return data.dt == DataType::F32 && data.dl == DataLayout::NCHW && + data.fbn_type == FuseBatchNormalizationType::CONVOLUTION; + }, + REGISTER_FP32_NEON(arm_compute::cpu::fused_batch_normalization_conv_f32)}, + {"fused_batch_normalization_dwc_NHWC_F32", + [](const FuseBatchNormalizeSelectorData &data) + { + return data.dt == DataType::F32 && data.dl == DataLayout::NHWC && + data.fbn_type == FuseBatchNormalizationType::DEPTHWISECONVOLUTION; + }, + REGISTER_FP32_NEON(arm_compute::cpu::fused_batch_normalization_dwc_nhwc_f32)}, + {"fused_batch_normalization_dwc_NCHW_F32", + [](const FuseBatchNormalizeSelectorData &data) + { + return data.dt == DataType::F32 && data.dl == DataLayout::NCHW && + data.fbn_type == FuseBatchNormalizationType::DEPTHWISECONVOLUTION; + }, + REGISTER_FP32_NEON(arm_compute::cpu::fused_batch_normalization_dwc_nchw_f32)}}; /** Micro-kernel selector * @@ -140,9 +139,9 @@ static const FBNUKernel available_kernels[] = */ const FBNUKernel *get_implementation(const FuseBatchNormalizeSelectorData &data) { - for(const auto &uk : available_kernels) + for (const auto &uk : available_kernels) { - if(uk.is_selected(data)) + if (uk.is_selected(data)) { return &uk; } @@ -150,10 +149,16 @@ const FBNUKernel *get_implementation(const FuseBatchNormalizeSelectorData &data) return nullptr; } -Status validate_arguments(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, - const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, - const ITensorInfo *input_bias, const ITensorInfo *bn_beta, const ITensorInfo *bn_gamma, - float epsilon, FuseBatchNormalizationType fbn_type) +Status validate_arguments(const ITensorInfo *input_weights, + const ITensorInfo *bn_mean, + const ITensorInfo *bn_var, + const ITensorInfo *fused_weights, + const ITensorInfo *fused_bias, + const ITensorInfo *input_bias, + const ITensorInfo *bn_beta, + const ITensorInfo *bn_gamma, + float epsilon, + FuseBatchNormalizationType fbn_type) { ARM_COMPUTE_UNUSED(epsilon); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input_weights, bn_mean, bn_var); @@ -164,43 +169,44 @@ Status validate_arguments(const ITensorInfo *input_weights, const ITensorInfo *b ARM_COMPUTE_RETURN_ERROR_ON(input_bias == nullptr && fused_bias == nullptr); ARM_COMPUTE_RETURN_ERROR_ON(bn_mean->num_dimensions() > 1); - if(fbn_type == FuseBatchNormalizationType::CONVOLUTION) + if (fbn_type == FuseBatchNormalizationType::CONVOLUTION) { ARM_COMPUTE_RETURN_ERROR_ON(input_weights->dimension(3) != bn_mean->dimension(0)); } else { - const size_t channel_idx = get_data_layout_dimension_index(input_weights->data_layout(), DataLayoutDimension::CHANNEL); + const size_t channel_idx = + get_data_layout_dimension_index(input_weights->data_layout(), DataLayoutDimension::CHANNEL); ARM_COMPUTE_RETURN_ERROR_ON(input_weights->dimension(channel_idx) != bn_mean->dimension(0)); } // Validate bias - if(input_bias != nullptr) + if (input_bias != nullptr) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mean, input_bias); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, input_bias); } // Validate beta - if(bn_beta != nullptr) + if (bn_beta != nullptr) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mean, bn_beta); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, bn_beta); } // Validate gamma - if(bn_gamma != nullptr) + if (bn_gamma != nullptr) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mean, bn_gamma); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, bn_gamma); } // Validate output weights - if(fused_weights != nullptr && fused_weights->total_size() != 0) + if (fused_weights != nullptr && fused_weights->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input_weights, fused_weights); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input_weights, fused_weights); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, fused_weights); } // Validate output bias - if(fused_bias != nullptr && fused_bias->total_size() != 0) + if (fused_bias != nullptr && fused_bias->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mean, fused_bias); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, fused_bias); @@ -212,15 +218,31 @@ Status validate_arguments(const ITensorInfo *input_weights, const ITensorInfo *b } // namespace NEFuseBatchNormalizationKernel::NEFuseBatchNormalizationKernel() - : _input_weights(nullptr), _input_bias(nullptr), _bn_mean(nullptr), _bn_var(nullptr), _bn_gamma(nullptr), _bn_beta(nullptr), _fused_weights(nullptr), _fused_bias(nullptr), _epsilon(), - _run_in_place_weights(false), _run_in_place_bias(false), _func(nullptr) + : _input_weights(nullptr), + _input_bias(nullptr), + _bn_mean(nullptr), + _bn_var(nullptr), + _bn_gamma(nullptr), + _bn_beta(nullptr), + _fused_weights(nullptr), + _fused_bias(nullptr), + _epsilon(), + _run_in_place_weights(false), + _run_in_place_bias(false), + _func(nullptr) { } -void NEFuseBatchNormalizationKernel::configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, - ITensor *fused_weights, ITensor *fused_bias, - const ITensor *input_bias, const ITensor *bn_beta, const ITensor *bn_gamma, - float epsilon, FuseBatchNormalizationType fbn_type) +void NEFuseBatchNormalizationKernel::configure(const ITensor *input_weights, + const ITensor *bn_mean, + const ITensor *bn_var, + ITensor *fused_weights, + ITensor *fused_bias, + const ITensor *input_bias, + const ITensor *bn_beta, + const ITensor *bn_gamma, + float epsilon, + FuseBatchNormalizationType fbn_type) { ARM_COMPUTE_ERROR_ON_NULLPTR(input_weights, bn_mean, bn_var); @@ -238,27 +260,27 @@ void NEFuseBatchNormalizationKernel::configure(const ITensor *input_weights, con _run_in_place_bias = (fused_bias == nullptr) || (input_bias != nullptr && fused_bias == input_bias); // Auto initialize outputs - if(_fused_weights != nullptr) + if (_fused_weights != nullptr) { // Output tensor auto initialization if not yet initialized auto_init_if_empty(*_fused_weights->info(), *_input_weights->info()->clone()); } - if(_fused_bias != nullptr) + if (_fused_bias != nullptr) { // Output tensor auto initialization if not yet initialized auto_init_if_empty(*_fused_bias->info(), *_bn_mean->info()->clone()); } // Validate arguments - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input_weights->info(), bn_mean->info(), bn_var->info(), - (fused_weights != nullptr) ? fused_weights->info() : nullptr, - (fused_bias != nullptr) ? fused_bias->info() : nullptr, - (input_bias != nullptr) ? input_bias->info() : nullptr, - (bn_beta != nullptr) ? bn_beta->info() : nullptr, - (bn_gamma != nullptr) ? bn_gamma->info() : nullptr, - epsilon, fbn_type)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments( + input_weights->info(), bn_mean->info(), bn_var->info(), + (fused_weights != nullptr) ? fused_weights->info() : nullptr, + (fused_bias != nullptr) ? fused_bias->info() : nullptr, (input_bias != nullptr) ? input_bias->info() : nullptr, + (bn_beta != nullptr) ? bn_beta->info() : nullptr, (bn_gamma != nullptr) ? bn_gamma->info() : nullptr, epsilon, + fbn_type)); - const auto *uk = get_implementation(FuseBatchNormalizeSelectorData{ input_weights->info()->data_type(), input_weights->info()->data_layout(), fbn_type, CPUInfo::get().get_isa() }); + const auto *uk = get_implementation(FuseBatchNormalizeSelectorData{ + input_weights->info()->data_type(), input_weights->info()->data_layout(), fbn_type, CPUInfo::get().get_isa()}); ARM_COMPUTE_ERROR_ON_NULLPTR(uk); ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr); _func = uk->ukernel; @@ -268,12 +290,19 @@ void NEFuseBatchNormalizationKernel::configure(const ITensor *input_weights, con INEKernel::configure(win); } -Status NEFuseBatchNormalizationKernel::validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, - const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, - const ITensorInfo *input_bias, const ITensorInfo *bn_beta, const ITensorInfo *bn_gamma, - float epsilon, FuseBatchNormalizationType fbn_type) +Status NEFuseBatchNormalizationKernel::validate(const ITensorInfo *input_weights, + const ITensorInfo *bn_mean, + const ITensorInfo *bn_var, + const ITensorInfo *fused_weights, + const ITensorInfo *fused_bias, + const ITensorInfo *input_bias, + const ITensorInfo *bn_beta, + const ITensorInfo *bn_gamma, + float epsilon, + FuseBatchNormalizationType fbn_type) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input_weights, bn_mean, bn_var, fused_weights, fused_bias, + input_bias, bn_beta, bn_gamma, epsilon, fbn_type)); return Status{}; } @@ -284,6 +313,7 @@ void NEFuseBatchNormalizationKernel::run(const Window &window, const ThreadInfo ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - (*_func)(_input_weights, _input_bias, _fused_weights, _fused_bias, _bn_mean, _bn_var, _bn_beta, _bn_gamma, _epsilon, window); + (*_func)(_input_weights, _input_bias, _fused_weights, _fused_bias, _bn_mean, _bn_var, _bn_beta, _bn_gamma, _epsilon, + window); } } // namespace arm_compute -- cgit v1.2.1