diff options
8 files changed, 262 insertions, 111 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h index fe65ac1a43..84d3594426 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -82,6 +82,8 @@ private: bool _is_quantized; bool _is_optimized; bool _are_weights_reshaped; + bool _is_nchw; + bool _is_first_run; }; /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels: diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp index 08d8f8ce56..edda2cd9da 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp @@ -44,6 +44,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::QS32, DataType::S32, DataType::F32); @@ -68,6 +69,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); } + ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(0) != input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL))); ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1); } else @@ -79,6 +81,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con if((output != nullptr) && (output->total_size() != 0)) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + if(is_data_type_fixed_point(input->data_type())) { ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QS8 && output->data_type() != DataType::QS8, "Wrong data type for output"); @@ -101,6 +105,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *bias, ITensorInfo *output) { + ARM_COMPUTE_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN); + bool window_changed = false; unsigned int num_elems_processed_per_iteration = 16 / element_size_from_data_type(input->data_type()); @@ -138,8 +144,16 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen } else { - AccessWindowStatic bias_access(bias, 0, 0, bias->dimension(0), bias->dimension(1)); - window_changed = update_window_and_padding(win, input_access, bias_access); + if(input->data_layout() == DataLayout::NCHW) + { + AccessWindowStatic bias_access(bias, 0, 0, bias->dimension(0), bias->dimension(1)); + window_changed = update_window_and_padding(win, input_access, bias_access); + } + else + { + AccessWindowHorizontal bias_access(bias, 0, num_elems_processed_per_iteration); + window_changed = update_window_and_padding(win, input_access, bias_access); + } } input_access.set_valid_region(win, ValidRegion(Coordinates(), input->tensor_shape())); @@ -253,6 +267,7 @@ template <typename T1, typename T2, bool in_place, bool has_bias> void output_stage(ITensor *input, const ITensor *bias, const Window &window, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift) { + ARM_COMPUTE_ERROR_ON(input->info()->data_layout() == DataLayout::UNKNOWN); ARM_COMPUTE_UNUSED(result_fixedpoint_multiplier); ARM_COMPUTE_UNUSED(result_shift); ARM_COMPUTE_UNUSED(result_offset_after_shift); @@ -303,6 +318,66 @@ void output_stage(ITensor *input, const ITensor *bias, const Window &window, ITe } } +template <typename T1, typename T2, bool in_place, bool has_bias> +void output_stage_nhwc(ITensor *input, const ITensor *bias, const Window &window, ITensor *output, + int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift) +{ + ARM_COMPUTE_UNUSED(result_fixedpoint_multiplier); + ARM_COMPUTE_UNUSED(result_shift); + ARM_COMPUTE_UNUSED(result_offset_after_shift); + + Window window_bias = window; + window_bias.set(Window::DimY, Window::Dimension(0, 0, 0)); + window_bias.set(Window::DimZ, Window::Dimension(0, 0, 0)); + window_bias.set(3, Window::Dimension(0, 0, 0)); + + Iterator in(input, window); + Iterator bi(bias, window_bias); + + if(in_place) // In place accumulate + { + execute_window_loop(window, [&](const Coordinates & id) + { + // Get bias and pointer to input + const auto in_ptr = reinterpret_cast<T1 *>(in.ptr()); + const auto bias_ptr = reinterpret_cast<T2 *>(bi.ptr()); + + // Accumulate bias + if(has_bias) + { + internal_vst1q(in_ptr, internal_vqaddq(internal_vld1q(in_ptr), internal_vld1q(bias_ptr))); + } + else + { + internal_vst1q(in_ptr, internal_vld1q(in_ptr)); + } + }, + in, bi); + } + else // Out of place accumulate + { + Iterator out(output, window); + execute_window_loop(window, [&](const Coordinates & id) + { + // Get bias and pointer to input + const auto in_ptr = reinterpret_cast<T1 *>(in.ptr()); + const auto out_ptr = reinterpret_cast<T2 *>(out.ptr()); + const auto bias_ptr = reinterpret_cast<T2 *>(bi.ptr()); + + // Accumulate bias + if(has_bias) + { + internal_vst1q(out_ptr, internal_vqaddq(internal_vld1q(in_ptr), internal_vld1q(bias_ptr))); + } + else + { + internal_vst1q(out_ptr, internal_vld1q(in_ptr)); + } + }, + in, bi); + } +} + // QASYMM8 specializations template <> void output_stage<int32_t, uint8_t, false, true>(ITensor *input, const ITensor *bias, const Window &window, ITensor *output, @@ -415,61 +490,79 @@ void NEDirectConvolutionLayerOutputStageKernel::configure(ITensor *input, const INEKernel::configure(win_config.second); // Set appropriate function - switch(input->info()->data_type()) + if(input->info()->data_layout() == DataLayout::NCHW) { - case DataType::QS8: + switch(input->info()->data_type()) { - if(bias == nullptr) + case DataType::QS8: { - _func = (output == nullptr) ? &output_stage<qint8_t, qint8_t, true, false> : &output_stage<qint8_t, qint8_t, false, false>; + if(bias == nullptr) + { + _func = (output == nullptr) ? &output_stage<qint8_t, qint8_t, true, false> : &output_stage<qint8_t, qint8_t, false, false>; + } + else + { + _func = (output == nullptr) ? &output_stage<qint8_t, qint8_t, true, true> : &output_stage<qint8_t, qint8_t, false, true>; + } + break; } - else + case DataType::QS16: { - _func = (output == nullptr) ? &output_stage<qint8_t, qint8_t, true, true> : &output_stage<qint8_t, qint8_t, false, true>; + if(bias != nullptr && bias->info()->data_type() == DataType::QS8) + { + _func = (output == nullptr) ? &output_stage<qint16_t, qint8_t, true, true> : &output_stage<qint16_t, qint8_t, false, true>; + } + else if(bias == nullptr) + { + _func = (output == nullptr) ? &output_stage<qint16_t, qint8_t, true, false> : &output_stage<qint16_t, qint8_t, false, false>; + } + else + { + ARM_COMPUTE_ERROR("Not implemented"); + } + break; } - break; - } - case DataType::QS16: - { - if(bias != nullptr && bias->info()->data_type() == DataType::QS8) + case DataType::QS32: { - _func = (output == nullptr) ? &output_stage<qint16_t, qint8_t, true, true> : &output_stage<qint16_t, qint8_t, false, true>; + _func = (output == nullptr) ? &output_stage<qint32_t, qint16_t, true, true> : &output_stage<qint32_t, qint16_t, false, true>; + break; } - else if(bias == nullptr) + case DataType::S32: { - _func = (output == nullptr) ? &output_stage<qint16_t, qint8_t, true, false> : &output_stage<qint16_t, qint8_t, false, false>; + _func = (bias == nullptr) ? &output_stage<int32_t, uint8_t, false, false> : &output_stage<int32_t, uint8_t, false, true>; + break; } - else +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + case DataType::F16: { - ARM_COMPUTE_ERROR("Not implemented"); + _func = (output == nullptr) ? &output_stage<float16_t, float16_t, true, true> : &output_stage<float16_t, float16_t, false, true>; + break; } - break; - } - case DataType::QS32: - { - _func = (output == nullptr) ? &output_stage<qint32_t, qint16_t, true, true> : &output_stage<qint32_t, qint16_t, false, true>; - break; - } - case DataType::S32: - { - _func = (bias == nullptr) ? &output_stage<int32_t, uint8_t, false, false> : &output_stage<int32_t, uint8_t, false, true>; - break; - } -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - { - _func = (output == nullptr) ? &output_stage<float16_t, float16_t, true, true> : &output_stage<float16_t, float16_t, false, true>; - break; - } #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - case DataType::F32: - { - _func = (output == nullptr) ? &output_stage<float, float, true, true> : &output_stage<float, float, false, true>; - break; + case DataType::F32: + { + _func = (output == nullptr) ? &output_stage<float, float, true, true> : &output_stage<float, float, false, true>; + break; + } + default: + { + ARM_COMPUTE_ERROR("Unsupported combination of types among the inputs."); + } } - default: + } + else + { + switch(input->info()->data_type()) { - ARM_COMPUTE_ERROR("Unsupported combination of types among the inputs."); + case DataType::F32: + { + _func = (output == nullptr) ? &output_stage_nhwc<float, float, true, true> : &output_stage_nhwc<float, float, false, true>; + break; + } + default: + { + ARM_COMPUTE_ERROR("Unsupported combination of types among the inputs."); + } } } } diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index f28ed715f6..8691fb9f76 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -37,7 +37,7 @@ using namespace arm_compute::misc::shape_calculator; NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3() : _dwc_kernel(), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(), _accumulator(), _input_nhwc(), _weights_hwio(), _output_nhwc(), _has_bias(false), - _is_quantized(false), _is_optimized(false), _are_weights_reshaped(false) + _is_quantized(false), _is_optimized(false), _are_weights_reshaped(false), _is_nchw(true), _is_first_run(true) { } @@ -52,30 +52,38 @@ void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *we _has_bias = biases != nullptr; _is_optimized = NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(input->info()->tensor_shape(), conv_info, - input->info()->data_type()); + input->info()->data_type(), + input->info()->data_layout()); _are_weights_reshaped = false; + _is_nchw = input->info()->data_layout() == DataLayout::NCHW; + + ARM_COMPUTE_ERROR_ON(!_is_optimized && !_is_nchw); if(_is_optimized) { - // Configure the function to transform the input tensor from NCHW -> NHWC - _permute_input.configure(input, &_input_nhwc, PermutationVector(2U, 0U, 1U)); - - // Configure the function to transform the weights tensor from IHW -> HWI - _permute_weights.configure(weights, &_weights_hwio, PermutationVector(2U, 0U, 1U)); + if(_is_nchw) + { + // Configure the function to transform the input tensor from NCHW -> NHWC + _permute_input.configure(input, &_input_nhwc, PermutationVector(2U, 0U, 1U)); - // Configure optimized depthwise - _dwc_kernel.configure(&_input_nhwc, &_weights_hwio, &_output_nhwc, conv_info, DataLayout::NHWC); + // Configure the function to transform the weights tensor from IHW -> HWI + _permute_weights.configure(weights, &_weights_hwio, PermutationVector(2U, 0U, 1U)); - // Configure the function to transform the convoluted output to ACL's native ordering format NCHW - _permute_output.configure(&_output_nhwc, output, PermutationVector(1U, 2U, 0U)); + // Configure optimized depthwise + _dwc_kernel.configure(&_input_nhwc, &_weights_hwio, &_output_nhwc, conv_info, DataLayout::NHWC); - // Allocate tensors - _input_nhwc.allocator()->allocate(); - _weights_hwio.allocator()->allocate(); - _output_nhwc.allocator()->allocate(); + // Configure the function to transform the convoluted output to ACL's native ordering format NCHW + _permute_output.configure(&_output_nhwc, output, PermutationVector(1U, 2U, 0U)); - // Create convolver (deferred) - _dwc_kernel.generate_convolver(); + // Allocate tensors + _input_nhwc.allocator()->allocate(); + _weights_hwio.allocator()->allocate(); + _output_nhwc.allocator()->allocate(); + } + else + { + _dwc_kernel.configure(input, weights, output, conv_info, DataLayout::NHWC); + } } else { @@ -116,8 +124,15 @@ void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *we void NEDepthwiseConvolutionLayer3x3::run() { + if(_is_first_run && _is_optimized) + { + _is_first_run = false; + // Create convolver (deferred) + _dwc_kernel.generate_convolver(); + } + // Permute weights in HWIO format if the optimized kernel will be executedd - if(!_are_weights_reshaped && _is_optimized) + if(!_are_weights_reshaped && _is_optimized && _is_nchw) { _are_weights_reshaped = true; _permute_weights.run(); @@ -126,8 +141,11 @@ void NEDepthwiseConvolutionLayer3x3::run() // Handle input if(_is_optimized) { - // Permute input to NHWC format execution - _permute_input.run(); + if(_is_nchw) + { + // Permute input to NHWC format execution + _permute_input.run(); + } } else { @@ -139,7 +157,7 @@ void NEDepthwiseConvolutionLayer3x3::run() NEScheduler::get().schedule(&_dwc_kernel, Window::DimX); // Permute output to ACL's native NCHW format in case of NHWC execution - if(_is_optimized) + if(_is_optimized && _is_nchw) { _permute_output.run(); } diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp index 8ac882cc60..1779ff1aee 100644 --- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp @@ -54,14 +54,17 @@ template <typename T> using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>; TEST_SUITE(Generic) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset(), framework::dataset::make("DataType", - DataType::F32))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), framework::dataset::make("DataType", - DataType::F32))) + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(CLAccessor(_target), _reference, tolerance_f32); } @@ -73,15 +76,17 @@ using CLDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidatio TEST_SUITE(Float) TEST_SUITE(F16) TEST_SUITE(W3x3) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DataType", - DataType::F16))) + DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(CLAccessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DataType", - DataType::F16))) + DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(CLAccessor(_target), _reference, tolerance_f16); } @@ -90,15 +95,17 @@ TEST_SUITE_END() TEST_SUITE(FP32) TEST_SUITE(W3x3) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DataType", - DataType::F32))) + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DataType", - DataType::F32))) + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(CLAccessor(_target), _reference, tolerance_f32); } diff --git a/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp b/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp index cacf6962ee..2baa93e413 100644 --- a/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -55,15 +55,17 @@ using GCDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidatio TEST_SUITE(Float) TEST_SUITE(FP16) TEST_SUITE(W3x3) -FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DataType", - DataType::F16))) + DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num); } -FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DataType", - DataType::F16))) + DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num); } diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp index 0cdd4c0296..49e146c084 100644 --- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp @@ -95,15 +95,17 @@ TEST_SUITE(F32) TEST_SUITE(Generic) template <typename T> using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>; -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallDepthwiseConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), framework::dataset::make("DataType", - DataType::F32))) + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), framework::dataset::make("DataType", - DataType::F32))) + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(Accessor(_target), _reference, tolerance_f32); } @@ -112,21 +114,24 @@ TEST_SUITE_END() TEST_SUITE(W3x3) template <typename T> using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer3x3, T>; -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DataType", - DataType::F32))) + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DataType", - DataType::F32))) + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunOptimized, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::OptimizedDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunOptimized, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(datasets::OptimizedDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DataType", - DataType::F32))) + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { validate(Accessor(_target), _reference, tolerance_f32); } diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h index df5436fcf7..ccdd443999 100644 --- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h @@ -52,15 +52,22 @@ public: public: template <typename...> - void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info) + void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout) { _quantization_info = quantization_info; _data_type = data_type; const TensorShape biases_shape(weights_shape[2]); const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; - _target = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info); - _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info); + if(data_layout == DataLayout::NHWC) + { + permute(in_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + permute(out_shape, PermutationVector(2U, 0U, 1U)); + } + + _target = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info, data_layout); + _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info, data_layout); } protected: @@ -94,13 +101,13 @@ protected: } TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &output_shape, PadStrideInfo &pad_stride_info, - const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info) + const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info, const DataLayout data_layout) { // Create tensors - TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, 0, quantization_info); - TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, 0, quantization_info); - TensorType biases = create_tensor<TensorType>(biases_shape, bias_data_type, 1, 0, quantization_info); - TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, 0, quantization_info); + TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, 0, quantization_info, data_layout); + TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, 0, quantization_info, data_layout); + TensorType biases = create_tensor<TensorType>(biases_shape, bias_data_type, 1, 0, quantization_info, data_layout); + TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, 0, quantization_info, data_layout); // Create Depthwise Convolution configure function FunctionType dwc; @@ -134,11 +141,11 @@ protected: } SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info, - const DataType data_type, const DataType bias_data_type, QuantizationInfo quantization_info) + const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info, const DataLayout data_layout) { - SimpleTensor<T> src{ in_shape, data_type, 1, 0, quantization_info }; - SimpleTensor<T> weights{ weights_shape, data_type, 1, 0, quantization_info }; - SimpleTensor<TBias> biases{ biases_shape, bias_data_type, 1, 0, quantization_info }; + SimpleTensor<T> src{ in_shape, data_type, 1, 0, quantization_info, data_layout }; + SimpleTensor<T> weights{ weights_shape, data_type, 1, 0, quantization_info, data_layout }; + SimpleTensor<TBias> biases{ biases_shape, bias_data_type, 1, 0, quantization_info, data_layout }; fill(src, 0); fill(weights, 1); @@ -158,10 +165,10 @@ class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLa { public: template <typename...> - void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type) + void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, DataLayout data_layout) { DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, out_shape, pad_stride_info, - data_type, QuantizationInfo()); + data_type, QuantizationInfo(), data_layout); } }; @@ -173,7 +180,7 @@ public: void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info) { DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, out_shape, pad_stride_info, - data_type, quantization_info); + data_type, quantization_info, DataLayout::NCHW); } }; } // namespace validation diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp index b2a7067709..ab61b7dd65 100644 --- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp @@ -24,6 +24,7 @@ #include "DepthwiseConvolutionLayer.h" #include "ConvolutionLayer.h" +#include "Permute.h" #include "Utils.h" #include "tests/validation/FixedPoint.h" @@ -50,11 +51,8 @@ namespace reference * */ template <typename T, typename TB> -SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info) +void depthwise_convolution_nchw(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, SimpleTensor<T> &dst, const PadStrideInfo &conv_info) { - // Create reference - SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() }; - // Compute reference const int filter_width = weights.shape().x(); const int filter_height = weights.shape().y(); @@ -108,8 +106,6 @@ SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTe } } } - - return dst; } template <> @@ -195,6 +191,27 @@ SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, co return dst; } +template <typename T, typename TB> +SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info) +{ + SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() }; + + if(src.data_layout() == DataLayout::NHWC && src.data_type() == DataType::F32) + { + SimpleTensor<T> src_nchw = reference::permute<T>(src, PermutationVector(1U, 2U, 0U)); + SimpleTensor<T> weights_nchw = reference::permute<T>(weights, PermutationVector(1U, 2U, 0U)); + SimpleTensor<T> dst_nchw = reference::permute<T>(dst, PermutationVector(1U, 2U, 0U)); + + depthwise_convolution_nchw<T, TB>(src_nchw, weights_nchw, biases, dst_nchw, conv_info); + + return reference::permute<T>(dst_nchw, PermutationVector(2U, 0U, 1U)); + } + + depthwise_convolution_nchw<T, TB>(src, weights, biases, dst, conv_info); + + return dst; +} + template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info); |