aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h2
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp177
-rw-r--r--src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp60
-rw-r--r--tests/validation/CL/DepthwiseConvolutionLayer.cpp31
-rw-r--r--tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp12
-rw-r--r--tests/validation/NEON/DepthwiseConvolutionLayer.cpp25
-rw-r--r--tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h37
-rw-r--r--tests/validation/reference/DepthwiseConvolutionLayer.cpp29
8 files changed, 262 insertions, 111 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index fe65ac1a43..84d3594426 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -82,6 +82,8 @@ private:
bool _is_quantized;
bool _is_optimized;
bool _are_weights_reshaped;
+ bool _is_nchw;
+ bool _is_first_run;
};
/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
index 08d8f8ce56..edda2cd9da 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
@@ -44,6 +44,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8,
DataType::QS16, DataType::F16,
DataType::QS32, DataType::S32, DataType::F32);
@@ -68,6 +69,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
}
+ ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(0) != input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)));
ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1);
}
else
@@ -79,6 +81,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
if((output != nullptr) && (output->total_size() != 0))
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+
if(is_data_type_fixed_point(input->data_type()))
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QS8 && output->data_type() != DataType::QS8, "Wrong data type for output");
@@ -101,6 +105,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *bias, ITensorInfo *output)
{
+ ARM_COMPUTE_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
+
bool window_changed = false;
unsigned int num_elems_processed_per_iteration = 16 / element_size_from_data_type(input->data_type());
@@ -138,8 +144,16 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
}
else
{
- AccessWindowStatic bias_access(bias, 0, 0, bias->dimension(0), bias->dimension(1));
- window_changed = update_window_and_padding(win, input_access, bias_access);
+ if(input->data_layout() == DataLayout::NCHW)
+ {
+ AccessWindowStatic bias_access(bias, 0, 0, bias->dimension(0), bias->dimension(1));
+ window_changed = update_window_and_padding(win, input_access, bias_access);
+ }
+ else
+ {
+ AccessWindowHorizontal bias_access(bias, 0, num_elems_processed_per_iteration);
+ window_changed = update_window_and_padding(win, input_access, bias_access);
+ }
}
input_access.set_valid_region(win, ValidRegion(Coordinates(), input->tensor_shape()));
@@ -253,6 +267,7 @@ template <typename T1, typename T2, bool in_place, bool has_bias>
void output_stage(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift)
{
+ ARM_COMPUTE_ERROR_ON(input->info()->data_layout() == DataLayout::UNKNOWN);
ARM_COMPUTE_UNUSED(result_fixedpoint_multiplier);
ARM_COMPUTE_UNUSED(result_shift);
ARM_COMPUTE_UNUSED(result_offset_after_shift);
@@ -303,6 +318,66 @@ void output_stage(ITensor *input, const ITensor *bias, const Window &window, ITe
}
}
+template <typename T1, typename T2, bool in_place, bool has_bias>
+void output_stage_nhwc(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
+ int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift)
+{
+ ARM_COMPUTE_UNUSED(result_fixedpoint_multiplier);
+ ARM_COMPUTE_UNUSED(result_shift);
+ ARM_COMPUTE_UNUSED(result_offset_after_shift);
+
+ Window window_bias = window;
+ window_bias.set(Window::DimY, Window::Dimension(0, 0, 0));
+ window_bias.set(Window::DimZ, Window::Dimension(0, 0, 0));
+ window_bias.set(3, Window::Dimension(0, 0, 0));
+
+ Iterator in(input, window);
+ Iterator bi(bias, window_bias);
+
+ if(in_place) // In place accumulate
+ {
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ // Get bias and pointer to input
+ const auto in_ptr = reinterpret_cast<T1 *>(in.ptr());
+ const auto bias_ptr = reinterpret_cast<T2 *>(bi.ptr());
+
+ // Accumulate bias
+ if(has_bias)
+ {
+ internal_vst1q(in_ptr, internal_vqaddq(internal_vld1q(in_ptr), internal_vld1q(bias_ptr)));
+ }
+ else
+ {
+ internal_vst1q(in_ptr, internal_vld1q(in_ptr));
+ }
+ },
+ in, bi);
+ }
+ else // Out of place accumulate
+ {
+ Iterator out(output, window);
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ // Get bias and pointer to input
+ const auto in_ptr = reinterpret_cast<T1 *>(in.ptr());
+ const auto out_ptr = reinterpret_cast<T2 *>(out.ptr());
+ const auto bias_ptr = reinterpret_cast<T2 *>(bi.ptr());
+
+ // Accumulate bias
+ if(has_bias)
+ {
+ internal_vst1q(out_ptr, internal_vqaddq(internal_vld1q(in_ptr), internal_vld1q(bias_ptr)));
+ }
+ else
+ {
+ internal_vst1q(out_ptr, internal_vld1q(in_ptr));
+ }
+ },
+ in, bi);
+ }
+}
+
// QASYMM8 specializations
template <>
void output_stage<int32_t, uint8_t, false, true>(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
@@ -415,61 +490,79 @@ void NEDirectConvolutionLayerOutputStageKernel::configure(ITensor *input, const
INEKernel::configure(win_config.second);
// Set appropriate function
- switch(input->info()->data_type())
+ if(input->info()->data_layout() == DataLayout::NCHW)
{
- case DataType::QS8:
+ switch(input->info()->data_type())
{
- if(bias == nullptr)
+ case DataType::QS8:
{
- _func = (output == nullptr) ? &output_stage<qint8_t, qint8_t, true, false> : &output_stage<qint8_t, qint8_t, false, false>;
+ if(bias == nullptr)
+ {
+ _func = (output == nullptr) ? &output_stage<qint8_t, qint8_t, true, false> : &output_stage<qint8_t, qint8_t, false, false>;
+ }
+ else
+ {
+ _func = (output == nullptr) ? &output_stage<qint8_t, qint8_t, true, true> : &output_stage<qint8_t, qint8_t, false, true>;
+ }
+ break;
}
- else
+ case DataType::QS16:
{
- _func = (output == nullptr) ? &output_stage<qint8_t, qint8_t, true, true> : &output_stage<qint8_t, qint8_t, false, true>;
+ if(bias != nullptr && bias->info()->data_type() == DataType::QS8)
+ {
+ _func = (output == nullptr) ? &output_stage<qint16_t, qint8_t, true, true> : &output_stage<qint16_t, qint8_t, false, true>;
+ }
+ else if(bias == nullptr)
+ {
+ _func = (output == nullptr) ? &output_stage<qint16_t, qint8_t, true, false> : &output_stage<qint16_t, qint8_t, false, false>;
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Not implemented");
+ }
+ break;
}
- break;
- }
- case DataType::QS16:
- {
- if(bias != nullptr && bias->info()->data_type() == DataType::QS8)
+ case DataType::QS32:
{
- _func = (output == nullptr) ? &output_stage<qint16_t, qint8_t, true, true> : &output_stage<qint16_t, qint8_t, false, true>;
+ _func = (output == nullptr) ? &output_stage<qint32_t, qint16_t, true, true> : &output_stage<qint32_t, qint16_t, false, true>;
+ break;
}
- else if(bias == nullptr)
+ case DataType::S32:
{
- _func = (output == nullptr) ? &output_stage<qint16_t, qint8_t, true, false> : &output_stage<qint16_t, qint8_t, false, false>;
+ _func = (bias == nullptr) ? &output_stage<int32_t, uint8_t, false, false> : &output_stage<int32_t, uint8_t, false, true>;
+ break;
}
- else
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ case DataType::F16:
{
- ARM_COMPUTE_ERROR("Not implemented");
+ _func = (output == nullptr) ? &output_stage<float16_t, float16_t, true, true> : &output_stage<float16_t, float16_t, false, true>;
+ break;
}
- break;
- }
- case DataType::QS32:
- {
- _func = (output == nullptr) ? &output_stage<qint32_t, qint16_t, true, true> : &output_stage<qint32_t, qint16_t, false, true>;
- break;
- }
- case DataType::S32:
- {
- _func = (bias == nullptr) ? &output_stage<int32_t, uint8_t, false, false> : &output_stage<int32_t, uint8_t, false, true>;
- break;
- }
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- case DataType::F16:
- {
- _func = (output == nullptr) ? &output_stage<float16_t, float16_t, true, true> : &output_stage<float16_t, float16_t, false, true>;
- break;
- }
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- case DataType::F32:
- {
- _func = (output == nullptr) ? &output_stage<float, float, true, true> : &output_stage<float, float, false, true>;
- break;
+ case DataType::F32:
+ {
+ _func = (output == nullptr) ? &output_stage<float, float, true, true> : &output_stage<float, float, false, true>;
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Unsupported combination of types among the inputs.");
+ }
}
- default:
+ }
+ else
+ {
+ switch(input->info()->data_type())
{
- ARM_COMPUTE_ERROR("Unsupported combination of types among the inputs.");
+ case DataType::F32:
+ {
+ _func = (output == nullptr) ? &output_stage_nhwc<float, float, true, true> : &output_stage_nhwc<float, float, false, true>;
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Unsupported combination of types among the inputs.");
+ }
}
}
}
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index f28ed715f6..8691fb9f76 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -37,7 +37,7 @@ using namespace arm_compute::misc::shape_calculator;
NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3()
: _dwc_kernel(), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(), _accumulator(), _input_nhwc(), _weights_hwio(), _output_nhwc(), _has_bias(false),
- _is_quantized(false), _is_optimized(false), _are_weights_reshaped(false)
+ _is_quantized(false), _is_optimized(false), _are_weights_reshaped(false), _is_nchw(true), _is_first_run(true)
{
}
@@ -52,30 +52,38 @@ void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *we
_has_bias = biases != nullptr;
_is_optimized = NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(input->info()->tensor_shape(),
conv_info,
- input->info()->data_type());
+ input->info()->data_type(),
+ input->info()->data_layout());
_are_weights_reshaped = false;
+ _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
+
+ ARM_COMPUTE_ERROR_ON(!_is_optimized && !_is_nchw);
if(_is_optimized)
{
- // Configure the function to transform the input tensor from NCHW -> NHWC
- _permute_input.configure(input, &_input_nhwc, PermutationVector(2U, 0U, 1U));
-
- // Configure the function to transform the weights tensor from IHW -> HWI
- _permute_weights.configure(weights, &_weights_hwio, PermutationVector(2U, 0U, 1U));
+ if(_is_nchw)
+ {
+ // Configure the function to transform the input tensor from NCHW -> NHWC
+ _permute_input.configure(input, &_input_nhwc, PermutationVector(2U, 0U, 1U));
- // Configure optimized depthwise
- _dwc_kernel.configure(&_input_nhwc, &_weights_hwio, &_output_nhwc, conv_info, DataLayout::NHWC);
+ // Configure the function to transform the weights tensor from IHW -> HWI
+ _permute_weights.configure(weights, &_weights_hwio, PermutationVector(2U, 0U, 1U));
- // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
- _permute_output.configure(&_output_nhwc, output, PermutationVector(1U, 2U, 0U));
+ // Configure optimized depthwise
+ _dwc_kernel.configure(&_input_nhwc, &_weights_hwio, &_output_nhwc, conv_info, DataLayout::NHWC);
- // Allocate tensors
- _input_nhwc.allocator()->allocate();
- _weights_hwio.allocator()->allocate();
- _output_nhwc.allocator()->allocate();
+ // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
+ _permute_output.configure(&_output_nhwc, output, PermutationVector(1U, 2U, 0U));
- // Create convolver (deferred)
- _dwc_kernel.generate_convolver();
+ // Allocate tensors
+ _input_nhwc.allocator()->allocate();
+ _weights_hwio.allocator()->allocate();
+ _output_nhwc.allocator()->allocate();
+ }
+ else
+ {
+ _dwc_kernel.configure(input, weights, output, conv_info, DataLayout::NHWC);
+ }
}
else
{
@@ -116,8 +124,15 @@ void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *we
void NEDepthwiseConvolutionLayer3x3::run()
{
+ if(_is_first_run && _is_optimized)
+ {
+ _is_first_run = false;
+ // Create convolver (deferred)
+ _dwc_kernel.generate_convolver();
+ }
+
// Permute weights in HWIO format if the optimized kernel will be executedd
- if(!_are_weights_reshaped && _is_optimized)
+ if(!_are_weights_reshaped && _is_optimized && _is_nchw)
{
_are_weights_reshaped = true;
_permute_weights.run();
@@ -126,8 +141,11 @@ void NEDepthwiseConvolutionLayer3x3::run()
// Handle input
if(_is_optimized)
{
- // Permute input to NHWC format execution
- _permute_input.run();
+ if(_is_nchw)
+ {
+ // Permute input to NHWC format execution
+ _permute_input.run();
+ }
}
else
{
@@ -139,7 +157,7 @@ void NEDepthwiseConvolutionLayer3x3::run()
NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);
// Permute output to ACL's native NCHW format in case of NHWC execution
- if(_is_optimized)
+ if(_is_optimized && _is_nchw)
{
_permute_output.run();
}
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index 8ac882cc60..1779ff1aee 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -54,14 +54,17 @@ template <typename T>
using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>;
TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset(), framework::dataset::make("DataType",
- DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+ framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
framework::dataset::make("DataType",
- DataType::F32)))
+ DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
@@ -73,15 +76,17 @@ using CLDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidatio
TEST_SUITE(Float)
TEST_SUITE(F16)
TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DataType",
- DataType::F16)))
+ DataType::F16)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
{
validate(CLAccessor(_target), _reference, tolerance_f16);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DataType",
- DataType::F16)))
+ DataType::F16)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
{
validate(CLAccessor(_target), _reference, tolerance_f16);
}
@@ -90,15 +95,17 @@ TEST_SUITE_END()
TEST_SUITE(FP32)
TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DataType",
- DataType::F32)))
+ DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DataType",
- DataType::F32)))
+ DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
{
validate(CLAccessor(_target), _reference, tolerance_f32);
}
diff --git a/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp b/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp
index cacf6962ee..2baa93e413 100644
--- a/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -55,15 +55,17 @@ using GCDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidatio
TEST_SUITE(Float)
TEST_SUITE(FP16)
TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DataType",
- DataType::F16)))
+ DataType::F16)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
{
validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DataType",
- DataType::F16)))
+ DataType::F16)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
{
validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num);
}
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
index 0cdd4c0296..49e146c084 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -95,15 +95,17 @@ TEST_SUITE(F32)
TEST_SUITE(Generic)
template <typename T>
using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
framework::dataset::make("DataType",
- DataType::F32)))
+ DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
framework::dataset::make("DataType",
- DataType::F32)))
+ DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
@@ -112,21 +114,24 @@ TEST_SUITE_END()
TEST_SUITE(W3x3)
template <typename T>
using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer3x3, T>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DataType",
- DataType::F32)))
+ DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DataType",
- DataType::F32)))
+ DataType::F32)),
+ framework::dataset::make("DataLayout", DataLayout::NCHW)))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunOptimized, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::OptimizedDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunOptimized, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(datasets::OptimizedDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DataType",
- DataType::F32)))
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index df5436fcf7..ccdd443999 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -52,15 +52,22 @@ public:
public:
template <typename...>
- void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
+ void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout)
{
_quantization_info = quantization_info;
_data_type = data_type;
const TensorShape biases_shape(weights_shape[2]);
const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
- _target = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info);
- _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info);
+ if(data_layout == DataLayout::NHWC)
+ {
+ permute(in_shape, PermutationVector(2U, 0U, 1U));
+ permute(weights_shape, PermutationVector(2U, 0U, 1U));
+ permute(out_shape, PermutationVector(2U, 0U, 1U));
+ }
+
+ _target = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info, data_layout);
+ _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info, data_layout);
}
protected:
@@ -94,13 +101,13 @@ protected:
}
TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &output_shape, PadStrideInfo &pad_stride_info,
- const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info)
+ const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info, const DataLayout data_layout)
{
// Create tensors
- TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, 0, quantization_info);
- TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, 0, quantization_info);
- TensorType biases = create_tensor<TensorType>(biases_shape, bias_data_type, 1, 0, quantization_info);
- TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, 0, quantization_info);
+ TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, 0, quantization_info, data_layout);
+ TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, 0, quantization_info, data_layout);
+ TensorType biases = create_tensor<TensorType>(biases_shape, bias_data_type, 1, 0, quantization_info, data_layout);
+ TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, 0, quantization_info, data_layout);
// Create Depthwise Convolution configure function
FunctionType dwc;
@@ -134,11 +141,11 @@ protected:
}
SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info,
- const DataType data_type, const DataType bias_data_type, QuantizationInfo quantization_info)
+ const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info, const DataLayout data_layout)
{
- SimpleTensor<T> src{ in_shape, data_type, 1, 0, quantization_info };
- SimpleTensor<T> weights{ weights_shape, data_type, 1, 0, quantization_info };
- SimpleTensor<TBias> biases{ biases_shape, bias_data_type, 1, 0, quantization_info };
+ SimpleTensor<T> src{ in_shape, data_type, 1, 0, quantization_info, data_layout };
+ SimpleTensor<T> weights{ weights_shape, data_type, 1, 0, quantization_info, data_layout };
+ SimpleTensor<TBias> biases{ biases_shape, bias_data_type, 1, 0, quantization_info, data_layout };
fill(src, 0);
fill(weights, 1);
@@ -158,10 +165,10 @@ class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLa
{
public:
template <typename...>
- void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type)
+ void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, DataLayout data_layout)
{
DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, out_shape, pad_stride_info,
- data_type, QuantizationInfo());
+ data_type, QuantizationInfo(), data_layout);
}
};
@@ -173,7 +180,7 @@ public:
void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
{
DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, out_shape, pad_stride_info,
- data_type, quantization_info);
+ data_type, quantization_info, DataLayout::NCHW);
}
};
} // namespace validation
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
index b2a7067709..ab61b7dd65 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -24,6 +24,7 @@
#include "DepthwiseConvolutionLayer.h"
#include "ConvolutionLayer.h"
+#include "Permute.h"
#include "Utils.h"
#include "tests/validation/FixedPoint.h"
@@ -50,11 +51,8 @@ namespace reference
*
*/
template <typename T, typename TB>
-SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info)
+void depthwise_convolution_nchw(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, SimpleTensor<T> &dst, const PadStrideInfo &conv_info)
{
- // Create reference
- SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() };
-
// Compute reference
const int filter_width = weights.shape().x();
const int filter_height = weights.shape().y();
@@ -108,8 +106,6 @@ SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTe
}
}
}
-
- return dst;
}
template <>
@@ -195,6 +191,27 @@ SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, co
return dst;
}
+template <typename T, typename TB>
+SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info)
+{
+ SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() };
+
+ if(src.data_layout() == DataLayout::NHWC && src.data_type() == DataType::F32)
+ {
+ SimpleTensor<T> src_nchw = reference::permute<T>(src, PermutationVector(1U, 2U, 0U));
+ SimpleTensor<T> weights_nchw = reference::permute<T>(weights, PermutationVector(1U, 2U, 0U));
+ SimpleTensor<T> dst_nchw = reference::permute<T>(dst, PermutationVector(1U, 2U, 0U));
+
+ depthwise_convolution_nchw<T, TB>(src_nchw, weights_nchw, biases, dst_nchw, conv_info);
+
+ return reference::permute<T>(dst_nchw, PermutationVector(2U, 0U, 1U));
+ }
+
+ depthwise_convolution_nchw<T, TB>(src, weights, biases, dst, conv_info);
+
+ return dst;
+}
+
template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
const PadStrideInfo &conv_info);