From 0cbb927ac309e332ac6e6f1ab9170f041f0138ab Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Thu, 1 Mar 2018 16:56:48 +0000 Subject: COMPMID-804: Add NHWC data format support for NEON batch normalisation Change-Id: I04892e7be3f5aa58cd95917a4f90a6b4ffcf6efc Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/122897 Reviewed-by: Giorgio Arena Tested-by: Jenkins Reviewed-by: Anthony Barbier --- .../NEON/kernels/NEBatchNormalizationLayerKernel.h | 21 +++- .../kernels/NEBatchNormalizationLayerKernel.cpp | 110 ++++++++++++++++++--- tests/benchmark/CL/BatchNormalizationLayer.cpp | 78 ++++++++------- .../GLES_COMPUTE/BatchNormalizationLayer.cpp | 78 ++++++++------- tests/benchmark/NEON/BatchNormalizationLayer.cpp | 78 ++++++++------- .../fixtures/BatchNormalizationLayerFixture.h | 10 +- tests/validation/CL/BatchNormalizationLayer.cpp | 56 ++++++----- .../GLES_COMPUTE/BatchNormalizationLayer.cpp | 34 ++++--- tests/validation/NEON/BatchNormalizationLayer.cpp | 58 ++++++----- .../fixtures/BatchNormalizationLayerFixture.h | 26 +++-- .../reference/BatchNormalizationLayer.cpp | 20 ++-- 11 files changed, 372 insertions(+), 197 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h index ae6b8634b3..2d33f87dfa 100644 --- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h @@ -119,7 +119,15 @@ private: * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). */ template - void batch_normalization_fp16(const Window &window); + void batch_normalization_fp16_nchw(const Window &window); + /** Template function to run batch normalization on fp16 on tensors with NHWC format + * + * @tparam fused_activation Boolean that flags if its a fused activation or not + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void batch_normalization_fp16_nhwc(const Window &window); /** Template function to run batch normalization on fp32 * * @tparam fused_activation Boolean that flags if its a fused activation or not @@ -128,7 +136,16 @@ private: * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). */ template - void batch_normalization_fp32(const Window &window); + void batch_normalization_fp32_nchw(const Window &window); + /** Template function to run batch normalization on fp32 on tensors with NHWC format + * + * @tparam fused_activation Boolean that flags if its a fused activation or not + * @tparam F Activation function functor to run + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void batch_normalization_fp32_nhwc(const Window &window); /** Common signature for all the batch normalization functions * * @param[in] window Region on which to execute the kernel. diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp index d1bdfac2da..6be50fdb0d 100644 --- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp @@ -58,6 +58,7 @@ validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const IT if(nullptr != output) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); } @@ -77,7 +78,7 @@ validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const IT ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, gamma); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, gamma); } - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) != mean->dimension(0)); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)) != mean->dimension(0)); return Status{}; } @@ -209,9 +210,9 @@ void NEBatchNormalizationLayerKernel::batch_normalization_qs16(const Window &win } template -void NEBatchNormalizationLayerKernel::batch_normalization_fp16(const Window &window) +void NEBatchNormalizationLayerKernel::batch_normalization_fp16_nchw(const Window &window) { - static_assert(!fused_activation, "Activation is not supported for QS8"); + static_assert(!fused_activation, "Activation is not supported for FP16"); ARM_COMPUTE_UNUSED(window); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC @@ -263,8 +264,43 @@ void NEBatchNormalizationLayerKernel::batch_normalization_fp16(const Window &win #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } +template +void NEBatchNormalizationLayerKernel::batch_normalization_fp16_nhwc(const Window &window) +{ + static_assert(!fused_activation, "Activation is not supported for FP16"); + + ARM_COMPUTE_UNUSED(window); +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + Iterator input(_input, window); + Iterator output(_output, window); + + const auto input_mean = reinterpret_cast(_mean->ptr_to_element(Coordinates(0, 0))); + const auto input_var = reinterpret_cast(_var->ptr_to_element(Coordinates(0, 0))); + const auto input_gamma = (_gamma != nullptr) ? reinterpret_cast(_gamma->ptr_to_element(Coordinates(0, 0))) : nullptr; + const auto input_beta = (_beta != nullptr) ? reinterpret_cast(_beta->ptr_to_element(Coordinates(0, 0))) : nullptr; + + const float16x8_t epsilon_vec = vdupq_n_f16(_epsilon); + execute_window_loop(window, [&](const Coordinates & id) + { + // Conctruct vectors + const float16x8_t mean_vec = vld1q_f16(input_mean + id.x()); + const float16x8_t var_vec = vld1q_f16(input_var + id.x()); + const float16x8_t gamma_vec = (input_gamma != nullptr) ? vld1q_f16(input_gamma + id.x()) : vdupq_n_f16(1.0); + const float16x8_t beta_vec = (input_beta != nullptr) ? vld1q_f16(input_beta + id.x()) : vdupq_n_f16(0.0); + // Calculate denominator + const float16x8_t denominator = vinvsqrtq_f16(vaddq_f16(var_vec, epsilon_vec)); + + // Calculate x bar and store results + const float16x8_t numerator = vsubq_f16(vld1q_f16(reinterpret_cast(input.ptr())), mean_vec); + const float16x8_t x_bar = vmulq_f16(numerator, denominator); + vst1q_f16(reinterpret_cast(output.ptr()), vaddq_f16(beta_vec, vmulq_f16(x_bar, gamma_vec))); + }, + input, output); +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +} + template -void NEBatchNormalizationLayerKernel::batch_normalization_fp32(const Window &window) +void NEBatchNormalizationLayerKernel::batch_normalization_fp32_nchw(const Window &window) { Iterator input(_input, window); Iterator output(_output, window); @@ -324,8 +360,50 @@ void NEBatchNormalizationLayerKernel::batch_normalization_fp32(const Window &win input, output); } +template +void NEBatchNormalizationLayerKernel::batch_normalization_fp32_nhwc(const Window &window) +{ + Iterator input(_input, window); + Iterator output(_output, window); + + F activation_functor(_act_info); + + const auto input_mean = reinterpret_cast(_mean->ptr_to_element(Coordinates(0, 0))); + const auto input_var = reinterpret_cast(_var->ptr_to_element(Coordinates(0, 0))); + const auto input_gamma = (_gamma != nullptr) ? reinterpret_cast(_gamma->ptr_to_element(Coordinates(0, 0))) : nullptr; + const auto input_beta = (_beta != nullptr) ? reinterpret_cast(_beta->ptr_to_element(Coordinates(0, 0))) : nullptr; + + const float32x4_t epsilon_vec = vdupq_n_f32(_epsilon); + execute_window_loop(window, [&](const Coordinates & id) + { + // Conctruct vectors + const float32x4_t mean_vec = vld1q_f32(input_mean + id.x()); + const float32x4_t var_vec = vld1q_f32(input_var + id.x()); + const float32x4_t gamma_vec = (input_gamma != nullptr) ? vld1q_f32(input_gamma + id.x()) : vdupq_n_f32(1.0); + const float32x4_t beta_vec = (input_beta != nullptr) ? vld1q_f32(input_beta + id.x()) : vdupq_n_f32(0.0); + // Calculate denominator + const float32x4_t denominator = vinvsqrtq_f32(vaddq_f32(var_vec, epsilon_vec)); + + // Calculate x bar + const float32x4_t numerator = vsubq_f32(vld1q_f32(reinterpret_cast(input.ptr())), mean_vec); + const float32x4_t x_bar = vmulq_f32(numerator, denominator); + float32x4_t res = vmlaq_f32(beta_vec, x_bar, gamma_vec); + + // Perform fused activation + if(fused_activation) + { + activation_functor(res); + } + + // Store results + vst1q_f32(reinterpret_cast(output.ptr()), res); + }, + input, output); +} + void NEBatchNormalizationLayerKernel::configure_non_fused() { + const bool is_nhwc = _input->info()->data_layout() == DataLayout::NHWC; switch(_input->info()->data_type()) { case DataType::QS8: @@ -335,10 +413,11 @@ void NEBatchNormalizationLayerKernel::configure_non_fused() _func = &NEBatchNormalizationLayerKernel::batch_normalization_qs16; break; case DataType::F16: - _func = &NEBatchNormalizationLayerKernel::batch_normalization_fp16; + _func = (is_nhwc) ? &NEBatchNormalizationLayerKernel::batch_normalization_fp16_nhwc : &NEBatchNormalizationLayerKernel::batch_normalization_fp16_nchw; break; case DataType::F32: - _func = &NEBatchNormalizationLayerKernel::batch_normalization_fp32>; + _func = (is_nhwc) ? &NEBatchNormalizationLayerKernel::batch_normalization_fp32_nhwc> : + &NEBatchNormalizationLayerKernel::batch_normalization_fp32_nchw>; break; default: ARM_COMPUTE_ERROR("Element size not supported"); @@ -348,18 +427,25 @@ void NEBatchNormalizationLayerKernel::configure_non_fused() void NEBatchNormalizationLayerKernel::configure_fused() { - // Fused Batched Normalization with activation functions : FP32 - static std::map bn_fused_map_f32 = + // NCHW Fused Batched Normalization with activation functions : FP32 + static std::map bn_fused_map_f32_nchw = + { + { ActivationLayerInfo::ActivationFunction::RELU, &NEBatchNormalizationLayerKernel::batch_normalization_fp32_nchw> }, + { ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, &NEBatchNormalizationLayerKernel::batch_normalization_fp32_nchw> }, + { ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, &NEBatchNormalizationLayerKernel::batch_normalization_fp32_nchw> } + }; + // NHWC Fused Batched Normalization with activation functions : FP32 + static std::map bn_fused_map_f32_nhwc = { - { ActivationLayerInfo::ActivationFunction::RELU, &NEBatchNormalizationLayerKernel::batch_normalization_fp32> }, - { ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, &NEBatchNormalizationLayerKernel::batch_normalization_fp32> }, - { ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, &NEBatchNormalizationLayerKernel::batch_normalization_fp32> } + { ActivationLayerInfo::ActivationFunction::RELU, &NEBatchNormalizationLayerKernel::batch_normalization_fp32_nhwc> }, + { ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, &NEBatchNormalizationLayerKernel::batch_normalization_fp32_nhwc> }, + { ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, &NEBatchNormalizationLayerKernel::batch_normalization_fp32_nhwc> } }; switch(_input->info()->data_type()) { case DataType::F32: - _func = bn_fused_map_f32[_act_info.activation()]; + _func = (_input->info()->data_layout() == DataLayout::NHWC) ? bn_fused_map_f32_nhwc[_act_info.activation()] : bn_fused_map_f32_nchw[_act_info.activation()]; break; default: ARM_COMPUTE_ERROR("Element size not supported"); diff --git a/tests/benchmark/CL/BatchNormalizationLayer.cpp b/tests/benchmark/CL/BatchNormalizationLayer.cpp index 3312319aac..3d11aea1e7 100644 --- a/tests/benchmark/CL/BatchNormalizationLayer.cpp +++ b/tests/benchmark/CL/BatchNormalizationLayer.cpp @@ -51,54 +51,60 @@ using CLBatchNormalizationLayerFixture = BatchNormalizationLayerFixture - void setup(TensorShape tensor_shape, TensorShape param_shape, float epsilon, bool use_gamma, bool use_beta, ActivationLayerInfo act_info, DataType data_type, int batches) + void setup(TensorShape tensor_shape, TensorShape param_shape, float epsilon, bool use_gamma, bool use_beta, ActivationLayerInfo act_info, DataType data_type, DataLayout data_layout, int batches) { // Set batched in source and destination shapes const unsigned int fixed_point_position = 4; tensor_shape.set(tensor_shape.num_dimensions(), batches); + if(data_layout == DataLayout::NHWC) + { + permute(tensor_shape, PermutationVector(2U, 0U, 1U)); + } // Create tensors - src = create_tensor(tensor_shape, data_type, 1, fixed_point_position); - dst = create_tensor(tensor_shape, data_type, 1, fixed_point_position); + src = create_tensor(tensor_shape, data_type, 1, fixed_point_position, QuantizationInfo(), data_layout); + dst = create_tensor(tensor_shape, data_type, 1, fixed_point_position, QuantizationInfo(), data_layout); mean = create_tensor(param_shape, data_type, 1, fixed_point_position); variance = create_tensor(param_shape, data_type, 1, fixed_point_position); beta = create_tensor(param_shape, data_type, 1, fixed_point_position); diff --git a/tests/validation/CL/BatchNormalizationLayer.cpp b/tests/validation/CL/BatchNormalizationLayer.cpp index 8c143060cb..6190e67dba 100644 --- a/tests/validation/CL/BatchNormalizationLayer.cpp +++ b/tests/validation/CL/BatchNormalizationLayer.cpp @@ -32,6 +32,7 @@ #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Helpers.h" #include "tests/validation/Validation.h" #include "tests/validation/fixtures/BatchNormalizationLayerFixture.h" @@ -61,18 +62,25 @@ TEST_SUITE(BatchNormalizationLayer) template using CLBatchNormalizationLayerFixture = BatchNormalizationLayerValidationFixture; -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::RandomBatchNormalizationLayerDataset(), - combine(framework::dataset::make("UseBeta", { false, true }), - framework::dataset::make("UseGamma", { false, true }))), - framework::dataset::make("DataType", { DataType::QS8, DataType::QS16, DataType::F16, DataType::F32 })), - shape0, shape1, epsilon, use_gamma, use_beta, dt) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + combine(framework::dataset::make("UseBeta", { false, true }), + framework::dataset::make("UseGamma", { false, true }))), + framework::dataset::make("DataType", { DataType::QS8, DataType::QS16, DataType::F16, DataType::F32 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + shape0, shape1, epsilon, use_gamma, use_beta, dt, data_layout) { // Set fixed point position data type allowed const int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0; + TensorShape src_dst_shapes = shape0; + if(data_layout == DataLayout::NHWC) + { + permute(src_dst_shapes, PermutationVector(2U, 0U, 1U)); + } + // Create tensors - CLTensor src = create_tensor(shape0, dt, 1, fixed_point_position); - CLTensor dst = create_tensor(shape0, dt, 1, fixed_point_position); + CLTensor src = create_tensor(src_dst_shapes, dt, 1, fixed_point_position, QuantizationInfo(), data_layout); + CLTensor dst = create_tensor(src_dst_shapes, dt, 1, fixed_point_position, QuantizationInfo(), data_layout); CLTensor mean = create_tensor(shape1, dt, 1, fixed_point_position); CLTensor var = create_tensor(shape1, dt, 1, fixed_point_position); CLTensor beta = create_tensor(shape1, dt, 1, fixed_point_position); @@ -85,7 +93,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datas norm.configure(&src, &dst, &mean, &var, beta_ptr, gamma_ptr, epsilon); // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape0); + const ValidRegion valid_region = shape_to_valid_region(src_dst_shapes); validate(dst.info()->valid_region(), valid_region); } @@ -155,11 +163,12 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TEST_SUITE(Float) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), +FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), combine(framework::dataset::make("UseBeta", { false, true }), framework::dataset::make("UseGamma", { false, true }))), act_infos), - framework::dataset::make("DataType", DataType::F32))) + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32, 0); @@ -167,11 +176,12 @@ FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture, framewor TEST_SUITE_END() TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), +FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), combine(framework::dataset::make("UseBeta", { false, true }), framework::dataset::make("UseGamma", { false, true }))), framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))), - framework::dataset::make("DataType", DataType::F16))) + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, 0); @@ -185,11 +195,12 @@ using CLBatchNormalizationLayerFixedPointFixture = BatchNormalizationLayerValida TEST_SUITE(QS8) FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), - framework::dataset::make("UseBeta", false)), - framework::dataset::make("UseGamma", false)), - framework::dataset::make("ActivationInfo", ActivationLayerInfo())), - framework::dataset::make("DataType", DataType::QS8)), + combine(combine(combine(combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + framework::dataset::make("UseBeta", false)), + framework::dataset::make("UseGamma", false)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo())), + framework::dataset::make("DataType", DataType::QS8)), + framework::dataset::make("DataLayout", DataLayout::NCHW)), framework::dataset::make("FractionalBits", 1, 6))) { // Validate output @@ -199,11 +210,12 @@ TEST_SUITE_END() TEST_SUITE(QS16) FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), - framework::dataset::make("UseBeta", false)), - framework::dataset::make("UseGamma", false)), - framework::dataset::make("ActivationInfo", ActivationLayerInfo())), - framework::dataset::make("DataType", DataType::QS16)), + combine(combine(combine(combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + framework::dataset::make("UseBeta", false)), + framework::dataset::make("UseGamma", false)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo())), + framework::dataset::make("DataType", DataType::QS16)), + framework::dataset::make("DataLayout", DataLayout::NCHW)), framework::dataset::make("FractionalBits", 1, 14))) { // Validate output diff --git a/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp b/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp index 2dbb0e0fbb..d22f1e9958 100644 --- a/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp +++ b/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp @@ -32,6 +32,7 @@ #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Helpers.h" #include "tests/validation/Validation.h" #include "tests/validation/fixtures/BatchNormalizationLayerFixture.h" @@ -59,18 +60,25 @@ TEST_SUITE(BatchNormalizationLayer) template using GCBatchNormalizationLayerFixture = BatchNormalizationLayerValidationFixture; -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::RandomBatchNormalizationLayerDataset(), - combine(framework::dataset::make("UseBeta", { false, true }), - framework::dataset::make("UseGamma", { false, true }))), - framework::dataset::make("DataType", { DataType::F32 })), - shape0, shape1, epsilon, use_beta, use_gamma, dt) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + combine(framework::dataset::make("UseBeta", { false, true }), + framework::dataset::make("UseGamma", { false, true }))), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + shape0, shape1, epsilon, use_beta, use_gamma, dt, data_layout) { // Set fixed point position data type allowed int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0; + TensorShape src_dst_shapes = shape0; + if(data_layout == DataLayout::NHWC) + { + permute(src_dst_shapes, PermutationVector(2U, 0U, 1U)); + } + // Create tensors - GCTensor src = create_tensor(shape0, dt, 1, fixed_point_position); - GCTensor dst = create_tensor(shape0, dt, 1, fixed_point_position); + GCTensor src = create_tensor(src_dst_shapes, dt, 1, fixed_point_position, QuantizationInfo(), data_layout); + GCTensor dst = create_tensor(src_dst_shapes, dt, 1, fixed_point_position, QuantizationInfo(), data_layout); GCTensor mean = create_tensor(shape1, dt, 1, fixed_point_position); GCTensor var = create_tensor(shape1, dt, 1, fixed_point_position); GCTensor beta = create_tensor(shape1, dt, 1, fixed_point_position); @@ -83,17 +91,18 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datas norm.configure(&src, &dst, &mean, &var, beta_ptr, gamma_ptr, epsilon); // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape0); + const ValidRegion valid_region = shape_to_valid_region(src_dst_shapes); validate(dst.info()->valid_region(), valid_region); } TEST_SUITE(Float) TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), +FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), combine(framework::dataset::make("UseBeta", { false, true }), framework::dataset::make("UseGamma", { false, true }))), act_infos), - framework::dataset::make("DataType", DataType::F16))) + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output validate(GCAccessor(_target), _reference, tolerance_f16, 0); @@ -101,11 +110,12 @@ FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture, framework TEST_SUITE_END() TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), +FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), combine(framework::dataset::make("UseBeta", { false, true }), framework::dataset::make("UseGamma", { false, true }))), act_infos), - framework::dataset::make("DataType", DataType::F32))) + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output validate(GCAccessor(_target), _reference, tolerance_f, 0); diff --git a/tests/validation/NEON/BatchNormalizationLayer.cpp b/tests/validation/NEON/BatchNormalizationLayer.cpp index 7bf1f2633e..53fd0163ff 100644 --- a/tests/validation/NEON/BatchNormalizationLayer.cpp +++ b/tests/validation/NEON/BatchNormalizationLayer.cpp @@ -32,6 +32,7 @@ #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Helpers.h" #include "tests/validation/Validation.h" #include "tests/validation/fixtures/BatchNormalizationLayerFixture.h" @@ -63,17 +64,24 @@ TEST_SUITE(BatchNormalizationLayer) template using NEBatchNormalizationLayerFixture = BatchNormalizationLayerValidationFixture; -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::RandomBatchNormalizationLayerDataset(), - combine(framework::dataset::make("UseBeta", { false, true }), framework::dataset::make("UseGamma", { false, true }))), - framework::dataset::make("DataType", { DataType::QS8, DataType::QS16, DataType::F32 })), - shape0, shape1, epsilon, use_beta, use_gamma, dt) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + combine(framework::dataset::make("UseBeta", { false, true }), framework::dataset::make("UseGamma", { false, true }))), + framework::dataset::make("DataType", { DataType::QS8, DataType::QS16, DataType::F32 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + shape0, shape1, epsilon, use_beta, use_gamma, dt, data_layout) { // Set fixed point position data type allowed const int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0; + TensorShape src_dst_shapes = shape0; + if(data_layout == DataLayout::NHWC) + { + permute(src_dst_shapes, PermutationVector(2U, 0U, 1U)); + } + // Create tensors - Tensor src = create_tensor(shape0, dt, 1, fixed_point_position); - Tensor dst = create_tensor(shape0, dt, 1, fixed_point_position); + Tensor src = create_tensor(src_dst_shapes, dt, 1, fixed_point_position, QuantizationInfo(), data_layout); + Tensor dst = create_tensor(src_dst_shapes, dt, 1, fixed_point_position, QuantizationInfo(), data_layout); Tensor mean = create_tensor(shape1, dt, 1, fixed_point_position); Tensor var = create_tensor(shape1, dt, 1, fixed_point_position); Tensor beta = create_tensor(shape1, dt, 1, fixed_point_position); @@ -86,7 +94,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datas norm.configure(&src, &dst, &mean, &var, beta_ptr, gamma_ptr, epsilon); // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape0); + const ValidRegion valid_region = shape_to_valid_region(src_dst_shapes); validate(dst.info()->valid_region(), valid_region); } @@ -154,11 +162,13 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( // *INDENT-ON* TEST_SUITE(Float) -FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), combine(framework::dataset::make("UseBeta", { false, true }), framework::dataset::make("UseGamma", { false, true }))), act_infos), - framework::dataset::make("DataType", DataType::F32))) + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f32, 0); @@ -166,18 +176,20 @@ FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture, framewor TEST_SUITE_END() #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -TEST_SUITE(Float16) -FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), combine(framework::dataset::make("UseBeta", { false, true }), framework::dataset::make("UseGamma", { false, true }))), framework::dataset::make("ActivationInfo", ActivationLayerInfo())), - framework::dataset::make("DataType", DataType::F16))) + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f16, 0); } TEST_SUITE_END() #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +TEST_SUITE_END() TEST_SUITE(Quantized) template @@ -185,11 +197,12 @@ using NEBatchNormalizationLayerFixedPointFixture = BatchNormalizationLayerValida TEST_SUITE(QS8) FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), - framework::dataset::make("UseBeta", false)), - framework::dataset::make("UseGamma", false)), - framework::dataset::make("ActivationInfo", ActivationLayerInfo())), - framework::dataset::make("DataType", DataType::QS8)), + combine(combine(combine(combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + framework::dataset::make("UseBeta", false)), + framework::dataset::make("UseGamma", false)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo())), + framework::dataset::make("DataType", DataType::QS8)), + framework::dataset::make("DataLayout", DataLayout::NCHW)), framework::dataset::make("FractionalBits", 1, 6))) { // Validate output @@ -199,11 +212,12 @@ TEST_SUITE_END() TEST_SUITE(QS16) FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), - framework::dataset::make("UseBeta", false)), - framework::dataset::make("UseGamma", false)), - framework::dataset::make("ActivationInfo", ActivationLayerInfo())), - framework::dataset::make("DataType", DataType::QS16)), + combine(combine(combine(combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + framework::dataset::make("UseBeta", false)), + framework::dataset::make("UseGamma", false)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo())), + framework::dataset::make("DataType", DataType::QS16)), + framework::dataset::make("DataLayout", DataLayout::NCHW)), framework::dataset::make("FractionalBits", 1, 14))) { // Validate output diff --git a/tests/validation/fixtures/BatchNormalizationLayerFixture.h b/tests/validation/fixtures/BatchNormalizationLayerFixture.h index 4a6ac1af7f..7e072e7023 100644 --- a/tests/validation/fixtures/BatchNormalizationLayerFixture.h +++ b/tests/validation/fixtures/BatchNormalizationLayerFixture.h @@ -45,14 +45,20 @@ class BatchNormalizationLayerValidationFixedPointFixture : public framework::Fix { public: template - void setup(TensorShape shape0, TensorShape shape1, float epsilon, bool use_beta, bool use_gamma, ActivationLayerInfo act_info, DataType dt, int fractional_bits) + void setup(TensorShape shape0, TensorShape shape1, float epsilon, bool use_beta, bool use_gamma, ActivationLayerInfo act_info, DataType dt, DataLayout data_layout, int fractional_bits) { _fractional_bits = fractional_bits; _data_type = dt; _use_beta = use_beta; _use_gamma = use_gamma; - _target = compute_target(shape0, shape1, epsilon, act_info, dt, fractional_bits); - _reference = compute_reference(shape0, shape1, epsilon, act_info, dt, fractional_bits); + + if(data_layout == DataLayout::NHWC) + { + permute(shape0, PermutationVector(2U, 0U, 1U)); + } + + _target = compute_target(shape0, shape1, epsilon, act_info, dt, data_layout, fractional_bits); + _reference = compute_reference(shape0, shape1, epsilon, act_info, dt, data_layout, fractional_bits); } protected: @@ -119,11 +125,11 @@ protected: } } - TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, float epsilon, ActivationLayerInfo act_info, DataType dt, int fixed_point_position) + TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, float epsilon, ActivationLayerInfo act_info, DataType dt, DataLayout data_layout, int fixed_point_position) { // Create tensors - TensorType src = create_tensor(shape0, dt, 1, fixed_point_position); - TensorType dst = create_tensor(shape0, dt, 1, fixed_point_position); + TensorType src = create_tensor(shape0, dt, 1, fixed_point_position, QuantizationInfo(), data_layout); + TensorType dst = create_tensor(shape0, dt, 1, fixed_point_position, QuantizationInfo(), data_layout); TensorType mean = create_tensor(shape1, dt, 1, fixed_point_position); TensorType var = create_tensor(shape1, dt, 1, fixed_point_position); TensorType beta = create_tensor(shape1, dt, 1, fixed_point_position); @@ -166,10 +172,10 @@ protected: return dst; } - SimpleTensor compute_reference(const TensorShape &shape0, const TensorShape &shape1, float epsilon, ActivationLayerInfo act_info, DataType dt, int fixed_point_position) + SimpleTensor compute_reference(const TensorShape &shape0, const TensorShape &shape1, float epsilon, ActivationLayerInfo act_info, DataType dt, DataLayout data_layout, int fixed_point_position) { // Create reference - SimpleTensor ref_src{ shape0, dt, 1, fixed_point_position }; + SimpleTensor ref_src{ shape0, dt, 1, fixed_point_position, QuantizationInfo(), data_layout }; SimpleTensor ref_mean{ shape1, dt, 1, fixed_point_position }; SimpleTensor ref_var{ shape1, dt, 1, fixed_point_position }; SimpleTensor ref_beta{ shape1, dt, 1, fixed_point_position }; @@ -194,9 +200,9 @@ class BatchNormalizationLayerValidationFixture : public BatchNormalizationLayerV { public: template - void setup(TensorShape shape0, TensorShape shape1, float epsilon, bool use_beta, bool use_gamma, ActivationLayerInfo act_info, DataType dt) + void setup(TensorShape shape0, TensorShape shape1, float epsilon, bool use_beta, bool use_gamma, ActivationLayerInfo act_info, DataType dt, DataLayout data_layout) { - BatchNormalizationLayerValidationFixedPointFixture::setup(shape0, shape1, epsilon, use_beta, use_gamma, act_info, dt, 0); + BatchNormalizationLayerValidationFixedPointFixture::setup(shape0, shape1, epsilon, use_beta, use_gamma, act_info, dt, data_layout, 0); } }; } // namespace validation diff --git a/tests/validation/reference/BatchNormalizationLayer.cpp b/tests/validation/reference/BatchNormalizationLayer.cpp index c8badacc79..ae309d9093 100644 --- a/tests/validation/reference/BatchNormalizationLayer.cpp +++ b/tests/validation/reference/BatchNormalizationLayer.cpp @@ -27,6 +27,7 @@ #include "tests/validation/FixedPoint.h" #include "tests/validation/Helpers.h" +#include "tests/validation/reference/Permute.h" namespace arm_compute { @@ -41,6 +42,7 @@ template ::value, int>:: SimpleTensor batch_normalization_layer(const SimpleTensor &src, const SimpleTensor &mean, const SimpleTensor &var, const SimpleTensor &beta, const SimpleTensor &gamma, float epsilon, ActivationLayerInfo act_info, int fixed_point_position) { + ARM_COMPUTE_ERROR_ON_MSG(src.data_layout() == DataLayout::NHWC, "Unsupported NHWC format"); ARM_COMPUTE_UNUSED(act_info); SimpleTensor result(src.shape(), src.data_type()); @@ -86,12 +88,14 @@ SimpleTensor batch_normalization_layer(const SimpleTensor &src, const Simp { ARM_COMPUTE_UNUSED(fixed_point_position); - SimpleTensor result(src.shape(), src.data_type()); + const bool is_nhwc = src.data_layout() == DataLayout::NHWC; + const SimpleTensor perm_src = (is_nhwc) ? permute(src, PermutationVector(1U, 2U, 0U)) : src; + SimpleTensor result(perm_src.shape(), perm_src.data_type()); - const auto cols = static_cast(src.shape()[0]); - const auto rows = static_cast(src.shape()[1]); - const auto depth = static_cast(src.shape()[2]); - const int upper_dims = src.shape().total_size() / (cols * rows * depth); + const auto cols = static_cast(perm_src.shape()[0]); + const auto rows = static_cast(perm_src.shape()[1]); + const auto depth = static_cast(perm_src.shape()[2]); + const int upper_dims = perm_src.shape().total_size() / (cols * rows * depth); for(int r = 0; r < upper_dims; ++r) { @@ -103,7 +107,7 @@ SimpleTensor batch_normalization_layer(const SimpleTensor &src, const Simp { const int pos = l + k * cols + i * rows * cols + r * cols * rows * depth; const float denominator = sqrt(var[i] + epsilon); - const float numerator = src[pos] - mean[i]; + const float numerator = perm_src[pos] - mean[i]; const float x_bar = numerator / denominator; result[pos] = beta[i] + x_bar * gamma[i]; } @@ -116,6 +120,10 @@ SimpleTensor batch_normalization_layer(const SimpleTensor &src, const Simp result = activation_layer(result, act_info); } + if(is_nhwc) + { + result = permute(result, PermutationVector(2U, 0U, 1U)); + } return result; } template SimpleTensor batch_normalization_layer(const SimpleTensor &src, const SimpleTensor &mean, const SimpleTensor &var, const SimpleTensor &beta, -- cgit v1.2.1