aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-07-10 19:49:11 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-07-12 11:07:25 +0000
commit4c7585178385241f87288b7903d760d4b4822c6e (patch)
tree529b6c605d29b3c1fea256558acfa3dbdc5e650e
parent98b8511fd2e6fe850344ee2c69a93475bced811d (diff)
downloadComputeLibrary-4c7585178385241f87288b7903d760d4b4822c6e.tar.gz
COMPMID-2421: Add 5x5 DepthwiseConv2d in the optimized path for NEON
Change-Id: Idecc8e7ce4404af83f0f99ed993be8f961f7b9af Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/1527 Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_2x2_fp16_fp16.cpp31
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp4
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp2
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_fp16.cpp (renamed from src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_1x1_fp16_fp16.cpp)3
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_fp32.cpp (renamed from src/core/NEON/kernels/convolution/depthwise/depthwise_4x4_3x3_2x2_fp32_fp32.cpp)2
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp2
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp2
-rw-r--r--src/core/Utils.cpp32
-rw-r--r--src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp210
-rw-r--r--tests/datasets/DepthwiseConvolutionLayerDataset.h31
-rw-r--r--tests/validation/NEON/DepthwiseConvolutionLayer.cpp196
11 files changed, 336 insertions, 179 deletions
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_2x2_fp16_fp16.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_2x2_fp16_fp16.cpp
deleted file mode 100644
index 83486920e1..0000000000
--- a/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_2x2_fp16_fp16.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "impl_fp16_fp16.hpp"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-namespace depthwise
-{
-template class DepthwiseConvolution<3, 3, 3, 3, 2, 2, float16_t, float16_t, float16_t>;
-} // namespace depthwise
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp
index c4f398e199..648105593c 100644
--- a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp
@@ -30,9 +30,13 @@ template class depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 1, 1, float, f
template class depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 2, 2, float, float, float>;
template class depthwise::DilatedDepthwiseConvolution<4, 4, 3, 3, 1, 1, float, float, float>;
template class depthwise::DilatedDepthwiseConvolution<4, 4, 3, 3, 2, 2, float, float, float>;
+template class depthwise::DilatedDepthwiseConvolution<4, 4, 5, 5, 1, 1, float, float, float>;
+template class depthwise::DilatedDepthwiseConvolution<3, 3, 5, 5, 2, 2, float, float, float>;
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
template class depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 1, 1, float16_t, float16_t, float16_t>;
template class depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 2, 2, float16_t, float16_t, float16_t>;
+template class depthwise::DilatedDepthwiseConvolution<3, 3, 5, 5, 1, 1, float16_t, float16_t, float16_t>;
+template class depthwise::DilatedDepthwiseConvolution<3, 3, 5, 5, 2, 2, float16_t, float16_t, float16_t>;
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp
index 879e06158d..3d6777b9cd 100644
--- a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp
@@ -140,3 +140,5 @@ QAsymm8DilatedDepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows,
template class depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 3, 3, 1, 1>;
template class depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 3, 3, 2, 2>;
+template class depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 5, 5, 1, 1>;
+template class depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 5, 5, 2, 2>;
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_1x1_fp16_fp16.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_fp16.cpp
index 23a99a8c62..ffe7844b70 100644
--- a/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_1x1_fp16_fp16.cpp
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_fp16.cpp
@@ -27,5 +27,8 @@
namespace depthwise
{
template class DepthwiseConvolution<3, 3, 3, 3, 1, 1, float16_t, float16_t, float16_t>;
+template class DepthwiseConvolution<3, 3, 3, 3, 2, 2, float16_t, float16_t, float16_t>;
+template class DepthwiseConvolution<3, 3, 5, 5, 1, 1, float16_t, float16_t, float16_t>;
+template class DepthwiseConvolution<3, 3, 5, 5, 2, 2, float16_t, float16_t, float16_t>;
} // namespace depthwise
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_4x4_3x3_2x2_fp32_fp32.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_fp32.cpp
index a04609d041..331e15868d 100644
--- a/src/core/NEON/kernels/convolution/depthwise/depthwise_4x4_3x3_2x2_fp32_fp32.cpp
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_fp32.cpp
@@ -26,4 +26,6 @@
namespace depthwise
{
template class DepthwiseConvolution<4, 4, 3, 3, 2, 2, float, float, float>;
+template class DepthwiseConvolution<4, 4, 5, 5, 1, 1, float, float, float>;
+template class DepthwiseConvolution<3, 3, 5, 5, 2, 2, float, float, float>;
} // namespace depthwise
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp
index f86f1bad73..21ea350661 100644
--- a/src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp
@@ -97,4 +97,6 @@ void PackParameters<KernelRows, KernelColumns, WeightSize, BiasSize>::execute(
template struct PackParameters<3, 3, 2ul, 2ul>;
template struct PackParameters<3, 3, 4ul, 4ul>;
+template struct PackParameters<5, 5, 2ul, 2ul>;
+template struct PackParameters<5, 5, 4ul, 4ul>;
} // namespace
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp
index 1989f87ef6..f683c6e01f 100644
--- a/src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp
@@ -27,4 +27,6 @@ namespace depthwise
{
template class QAsymm8DepthwiseConvolution<2, 2, 3, 3, 1, 1>;
template class QAsymm8DepthwiseConvolution<2, 2, 3, 3, 2, 2>;
+template class QAsymm8DepthwiseConvolution<2, 2, 5, 5, 1, 1>;
+template class QAsymm8DepthwiseConvolution<2, 2, 5, 5, 2, 2>;
} // namespace depthwise
diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp
index 499a6c8b29..5d32750f0d 100644
--- a/src/core/Utils.cpp
+++ b/src/core/Utils.cpp
@@ -333,17 +333,27 @@ std::string arm_compute::lower_string(const std::string &val)
PadStrideInfo arm_compute::calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info, DataLayout data_layout, const Size2D &dilation)
{
- const unsigned int width_idx = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const unsigned int height_idx = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const auto &strides = conv_info.stride();
- const int out_width = std::ceil(float(input_shape[width_idx]) / float(strides.first));
- const int out_height = std::ceil(float(input_shape[height_idx]) / float(strides.second));
- const int pad_width = (out_width - 1) * strides.first + (weights_shape[width_idx] + (dilation.x() - 1) * (weights_shape[width_idx] - 1) - input_shape[width_idx]);
- const int pad_height = (out_height - 1) * strides.second + (weights_shape[height_idx] + (dilation.y() - 1) * (weights_shape[height_idx] - 1) - input_shape[height_idx]);
- const int same_pad_left = pad_width / 2;
- const int same_pad_top = pad_height / 2;
- const int same_pad_right = pad_width - same_pad_left;
- const int same_pad_bottom = pad_height - same_pad_top;
+ const unsigned int width_idx = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const unsigned int height_idx = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const auto &strides = conv_info.stride();
+
+ // Calculate output dimensions
+ const int out_width = (input_shape[width_idx] + strides.first - 1) / strides.first;
+ const int out_height = (input_shape[height_idx] + strides.second - 1) / strides.second;
+
+ // Calculate effective weights sizes
+ const int real_weight_width = (weights_shape[width_idx] - 1) * dilation.x() + 1;
+ const int real_weight_height = (weights_shape[height_idx] - 1) * dilation.y() + 1;
+
+ // Calculate total pad
+ const int pad_width = (out_width - 1) * strides.first + real_weight_width - input_shape[width_idx];
+ const int pad_height = (out_height - 1) * strides.second + real_weight_height - input_shape[height_idx];
+
+ // Calculate individual paddings
+ const int same_pad_left = pad_width / 2;
+ const int same_pad_top = pad_height / 2;
+ const int same_pad_right = pad_width - same_pad_left;
+ const int same_pad_bottom = pad_height - same_pad_top;
return { static_cast<unsigned int>(strides.first),
static_cast<unsigned int>(strides.second),
diff --git a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
index b28aaa715f..92ad93e4a7 100644
--- a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
+++ b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
@@ -36,10 +36,136 @@
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include <set>
+
namespace arm_compute
{
namespace
{
+std::unique_ptr<depthwise::IDepthwiseConvolution> get_qasymm8_convolver(int kernel_size, int stride_x,
+ int n_batches, int in_rows, int in_cols, int n_channels,
+ int dilation_factor, neon_convolution_kernels::ActivationFunction activation,
+ const qasymm8::QAsymm8Params &wqinfo, const qasymm8::QAsymm8Params &iqinfo, const qasymm8::QAsymm8Params &oqinfo,
+ const qasymm8::QAsymm8RescaleParams &rescale_params,
+ int padding_top, int padding_left, int padding_bottom, int padding_right)
+{
+ switch(kernel_size)
+ {
+ case 3:
+ {
+ switch(stride_x)
+ {
+ case 1:
+ return arm_compute::support::cpp14::make_unique<depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 3, 3, 1, 1>>(
+ n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);
+ case 2:
+ return arm_compute::support::cpp14::make_unique<depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 3, 3, 2, 2>>(
+ n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);
+ default:
+ return nullptr;
+ }
+ }
+ case 5:
+ {
+ switch(stride_x)
+ {
+ case 1:
+ return arm_compute::support::cpp14::make_unique<depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 5, 5, 1, 1>>(
+ n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);
+ case 2:
+ return arm_compute::support::cpp14::make_unique<depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 5, 5, 2, 2>>(
+ n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);
+ default:
+ return nullptr;
+ }
+ }
+ default:
+ return nullptr;
+ }
+}
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+std::unique_ptr<depthwise::IDepthwiseConvolution> get_fp16_convolver(int kernel_size, int stride_x,
+ int n_batches, int in_rows, int in_cols, int n_channels,
+ int dilation_factor, neon_convolution_kernels::ActivationFunction activation,
+ int padding_top, int padding_left, int padding_bottom, int padding_right)
+{
+ switch(kernel_size)
+ {
+ case 3:
+ {
+ switch(stride_x)
+ {
+ case 1:
+ return arm_compute::support::cpp14::make_unique<depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 1, 1, float16_t, float16_t, float16_t>>(
+ n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right);
+ case 2:
+ return arm_compute::support::cpp14::make_unique<depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 2, 2, float16_t, float16_t, float16_t>>(
+ n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right);
+ default:
+ return nullptr;
+ }
+ }
+ case 5:
+ {
+ switch(stride_x)
+ {
+ case 1:
+ return arm_compute::support::cpp14::make_unique<depthwise::DilatedDepthwiseConvolution<3, 3, 5, 5, 1, 1, float16_t, float16_t, float16_t>>(
+ n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right);
+ case 2:
+ return arm_compute::support::cpp14::make_unique<depthwise::DilatedDepthwiseConvolution<3, 3, 5, 5, 2, 2, float16_t, float16_t, float16_t>>(
+ n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right);
+ default:
+ return nullptr;
+ }
+ }
+ default:
+ return nullptr;
+ }
+}
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+std::unique_ptr<depthwise::IDepthwiseConvolution> get_fp32_convolver(int kernel_size, int stride_x,
+ int n_batches, int in_rows, int in_cols, int n_channels,
+ int dilation_factor, neon_convolution_kernels::ActivationFunction activation,
+ int padding_top, int padding_left, int padding_bottom, int padding_right)
+{
+ switch(kernel_size)
+ {
+ case 3:
+ {
+ switch(stride_x)
+ {
+ case 1:
+ return arm_compute::support::cpp14::make_unique<depthwise::DilatedDepthwiseConvolution<4, 4, 3, 3, 1, 1, float, float, float>>(
+ n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right);
+ case 2:
+ return arm_compute::support::cpp14::make_unique<depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 2, 2, float, float, float>>(
+ n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right);
+ default:
+ return nullptr;
+ }
+ }
+ case 5:
+ {
+ switch(stride_x)
+ {
+ case 1:
+ return arm_compute::support::cpp14::make_unique<depthwise::DilatedDepthwiseConvolution<4, 4, 5, 5, 1, 1, float, float, float>>(
+ n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right);
+ case 2:
+ return arm_compute::support::cpp14::make_unique<depthwise::DilatedDepthwiseConvolution<3, 3, 5, 5, 2, 2, float, float, float>>(
+ n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right);
+ default:
+ return nullptr;
+ }
+ }
+ default:
+ return nullptr;
+ }
+}
+
std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor *input,
const ITensor *weights,
ITensor *output,
@@ -61,7 +187,8 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor
const int padding_bottom = conv_info.pad_bottom();
const int padding_right = conv_info.pad_right();
- const unsigned int stride_x = conv_info.stride().first;
+ const unsigned int stride_x = conv_info.stride().first;
+ const unsigned int kernel_size = weights->info()->tensor_shape().y();
// Map activation function
neon_convolution_kernels::ActivationFunction activation = neon_convolution_kernels::ActivationFunction::None;
@@ -96,18 +223,8 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor
quantization::calculate_quantized_multiplier_less_than_one(fmultipler, &qmultiplier, &qshift);
qasymm8::QAsymm8RescaleParams rescale_params(qshift, qmultiplier, fmultipler);
- // Create convolver
- switch(stride_x)
- {
- case 1:
- return arm_compute::support::cpp14::make_unique<depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 3, 3, 1, 1>>(
- n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);
- case 2:
- return arm_compute::support::cpp14::make_unique<depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 3, 3, 2, 2>>(
- n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);
- default:
- return nullptr;
- }
+ return get_qasymm8_convolver(kernel_size, stride_x, n_batches, in_rows, in_cols, n_channels, dilation_factor, activation,
+ wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);
}
else
{
@@ -117,34 +234,12 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
{
- switch(stride_x)
- {
- case 1:
- return arm_compute::support::cpp14::make_unique<depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 1, 1, float16_t, float16_t, float16_t>>(
- n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right);
- case 2:
- return arm_compute::support::cpp14::make_unique<depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 2, 2, float16_t, float16_t, float16_t>>(
- n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right);
- default:
- return nullptr;
- }
- break;
+ return get_fp16_convolver(kernel_size, stride_x, n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right);
}
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F32:
{
- switch(stride_x)
- {
- case 1:
- return arm_compute::support::cpp14::make_unique<depthwise::DilatedDepthwiseConvolution<4, 4, 3, 3, 1, 1, float, float, float>>(
- n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right);
- case 2:
- return arm_compute::support::cpp14::make_unique<depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 2, 2, float, float, float>>(
- n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right);
- default:
- return nullptr;
- }
- break;
+ return get_fp32_convolver(kernel_size, stride_x, n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right);
}
default:
return nullptr;
@@ -236,15 +331,10 @@ Status NEDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
- const auto strides = conv_info.stride();
- const DataLayout data_layout = input->data_layout();
- unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != 3 || weights->dimension(height_idx) != 3);
- ARM_COMPUTE_RETURN_ERROR_ON(!((strides.first == strides.second) && ((strides.first == 1) || (strides.first == 2))));
- ARM_COMPUTE_RETURN_ERROR_ON(depth_multiplier != 1);
- ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() != dilation.y());
+ // Validate convolver
+ ARM_COMPUTE_RETURN_ERROR_ON(!is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation));
+ // Validate activation
const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);
ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled() && !(is_relu || is_relu6));
@@ -252,7 +342,7 @@ Status NEDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo
// Check bias
if(bias != nullptr)
{
- unsigned int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+ unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1);
ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(0) != weights->dimension(channel_idx));
}
@@ -291,24 +381,28 @@ bool NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(const ITenso
bool is_data_type_valid = is_data_type_float(data_type) || is_data_type_quantized_asymmetric(data_type);
// Check weighs size
- const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- bool weights_supported = (weights->dimension(width_idx) == 3) && (weights->dimension(height_idx) == 3);
+ std::set<unsigned int> supported_kernel_sizes = { 3, 5 };
+ const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const unsigned int kernel_w = weights->dimension(width_idx);
+ const unsigned int kernel_h = weights->dimension(height_idx);
+ bool weights_supported = (kernel_w == kernel_h) && (supported_kernel_sizes.count(kernel_w) != 0);
// Check for supported strides
const auto &strides = conv_info.stride();
bool supported_strides = (strides.first == strides.second) && ((strides.first == 1) || (strides.first == 2));
// Check for supported padding
- const auto pad_top = conv_info.pad_top();
- const auto pad_right = conv_info.pad_right();
- const auto pad_bottom = conv_info.pad_bottom();
- const auto pad_left = conv_info.pad_left();
- PadStrideInfo same_pad = calculate_same_pad(in_shape, TensorShape(3U, 3U), conv_info);
- bool is_same_padding = (pad_top == same_pad.pad_top()) && (pad_right == same_pad.pad_right()) && (pad_bottom == same_pad.pad_bottom()) && (pad_left == same_pad.pad_left());
- bool is_valid_padding = (pad_top == 0) && (pad_right == 0) && (pad_bottom == 0) && (pad_left == 0);
- bool supported_padding = is_same_padding || is_valid_padding;
- bool is_dilation_supported = (dilation.x() == dilation.y()) || (dilation == Size2D(1U, 1U));
+ const auto pad_top = conv_info.pad_top();
+ const auto pad_right = conv_info.pad_right();
+ const auto pad_bottom = conv_info.pad_bottom();
+ const auto pad_left = conv_info.pad_left();
+ PadStrideInfo same_pad = calculate_same_pad(in_shape, TensorShape(kernel_w, kernel_h), conv_info, DataLayout::NCHW, dilation);
+ bool is_same_padding = (pad_top == same_pad.pad_top()) && (pad_right == same_pad.pad_right()) && (pad_bottom == same_pad.pad_bottom()) && (pad_left == same_pad.pad_left());
+ bool is_valid_padding = (pad_top == 0) && (pad_right == 0) && (pad_bottom == 0) && (pad_left == 0);
+ bool supported_padding = is_same_padding || is_valid_padding;
+ // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported
+ bool is_dilation_supported = (dilation == Size2D(1U, 1U)) || ((dilation.x() == dilation.y()) && strides.first == 1);
return is_data_type_valid && weights_supported && supported_strides && supported_padding && (depth_multiplier == 1) && is_dilation_supported;
}
diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h
index 440cb88ac2..d0617275c0 100644
--- a/tests/datasets/DepthwiseConvolutionLayerDataset.h
+++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h
@@ -211,11 +211,15 @@ public:
{
// Stride 1
add_config(TensorShape(7U, 7U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL));
+ add_config(TensorShape(7U, 7U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL), Size2D(2U, 2U));
add_config(TensorShape(7U, 7U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL));
+ add_config(TensorShape(7U, 7U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 2, 2, DimensionRoundingType::CEIL), Size2D(2U, 2U));
// Stride 2
- add_config(TensorShape(7U, 7U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL));
- add_config(TensorShape(7U, 7U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1, 1, 1, DimensionRoundingType::CEIL));
- add_config(TensorShape(9U, 9U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1, 1, 1, DimensionRoundingType::CEIL), Size2D(2U, 2U));
+ add_config(TensorShape(9U, 9U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL));
+ add_config(TensorShape(9U, 9U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), Size2D(2U, 2U));
+ add_config(TensorShape(9U, 9U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1, DimensionRoundingType::CEIL));
+ // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported
+ // add_config(TensorShape(9U, 9U, 1U), Size2D(3U, 3U), PadStrideInfo(2, 2, 2, 2, DimensionRoundingType::CEIL), Size2D(2U, 2U));
}
};
/** Dataset containing optimized, 3x3 depthwise convolution shapes. */
@@ -239,6 +243,27 @@ public:
add_config(TensorShape(64U, 64U, 128U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL));
}
};
+
+/** Dataset containing optimized, 5x5 depthwise convolution shapes. */
+class SmallOptimizedDepthwiseConvolutionLayerDataset5x5 final : public DepthwiseConvolutionLayerDataset
+{
+public:
+ SmallOptimizedDepthwiseConvolutionLayerDataset5x5()
+ {
+ // Stride 1
+ add_config(TensorShape(7U, 7U, 16U), Size2D(5U, 5U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL));
+ add_config(TensorShape(11U, 11U, 16U), Size2D(5U, 5U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL), Size2D(2U, 2U));
+ add_config(TensorShape(7U, 7U, 16U), Size2D(5U, 5U), PadStrideInfo(1, 1, 2, 2, DimensionRoundingType::CEIL));
+ add_config(TensorShape(7U, 7U, 16U), Size2D(5U, 5U), PadStrideInfo(1, 1, 4, 4, DimensionRoundingType::CEIL), Size2D(2U, 2U));
+ // Stride 2
+ add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL));
+ // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported
+ // add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), Size2D(2U, 2U));
+ add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 2, 2, 2, 2, DimensionRoundingType::CEIL));
+ // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported
+ // add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 4, 4, 4, 4, DimensionRoundingType::CEIL), Size2D(2U, 2U));
+ }
+};
} // namespace datasets
} // namespace test
} // namespace arm_compute
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
index 2ffe540fbc..ec9eb107aa 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -294,38 +294,41 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, fram
TEST_SUITE_END() // Dilation
TEST_SUITE_END() // Generic
-TEST_SUITE(W3x3)
template <typename T>
-using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayerOptimized, T>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
- depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+using NEDepthwiseConvolutionLayerFixtureOptimized = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayerOptimized, T>;
+
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixtureOptimized<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+ depth_multipliers),
+ framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
- depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixtureOptimized<float>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+ depth_multipliers),
+ framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
TEST_SUITE(Dilation)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
- depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixtureOptimized<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+ depth_multipliers),
+ framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY,
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixtureOptimized<float>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
depth_multipliers),
framework::dataset::make("DataType",
@@ -337,8 +340,10 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, f
}
TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // W3x3
-FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::PRECOMMIT,
+TEST_SUITE(Optimized)
+FIXTURE_DATA_TEST_CASE(RunSmall3x3, NEDepthwiseConvolutionLayerFixtureOptimized<float>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DepthMultiplier", 1)),
framework::dataset::make("DataType",
@@ -348,7 +353,17 @@ FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerFixture3x3<
{
validate(Accessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY,
+FIXTURE_DATA_TEST_CASE(RunSmall5x5, NEDepthwiseConvolutionLayerFixtureOptimized<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+ framework::dataset::make("DepthMultiplier", 1)),
+ framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge3x3, NEDepthwiseConvolutionLayerFixtureOptimized<float>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DepthMultiplier", 1)),
framework::dataset::make("DataType",
@@ -358,7 +373,7 @@ FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerFixture3x3<
{
validate(Accessor(_target), _reference, tolerance_f32);
}
-TEST_SUITE_END() // W3x3
+TEST_SUITE_END() // Optimized
TEST_SUITE_END() // F32
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
@@ -407,40 +422,42 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, frame
TEST_SUITE_END() // Dilation
TEST_SUITE_END() // Generic
-TEST_SUITE(W3x3)
template <typename T>
-using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayerOptimized, T>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
- depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+using NEDepthwiseConvolutionLayerFixtureOptimized = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayerOptimized, T>;
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixtureOptimized<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+ depth_multipliers),
+ framework::dataset::make("DataType",
+ DataType::F16)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f16);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
- depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixtureOptimized<half>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+ depth_multipliers),
+ framework::dataset::make("DataType",
+ DataType::F16)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f16);
}
TEST_SUITE(Dilation)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
- depth_multipliers),
- framework::dataset::make("DataType",
- DataType::F16)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixtureOptimized<half>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+ depth_multipliers),
+ framework::dataset::make("DataType",
+ DataType::F16)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_f16);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY,
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixtureOptimized<half>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
depth_multipliers),
framework::dataset::make("DataType",
@@ -452,8 +469,10 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<half>, fr
}
TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // W3x3
-FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::PRECOMMIT,
+TEST_SUITE(Optimized)
+FIXTURE_DATA_TEST_CASE(RunSmallW3x3, NEDepthwiseConvolutionLayerFixtureOptimized<half>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DepthMultiplier", 1)),
framework::dataset::make("DataType",
@@ -463,7 +482,17 @@ FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerFixture3x3<
{
validate(Accessor(_target), _reference, tolerance_f16);
}
-FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY,
+FIXTURE_DATA_TEST_CASE(RunSmallW5x5, NEDepthwiseConvolutionLayerFixtureOptimized<half>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+ framework::dataset::make("DepthMultiplier", 1)),
+ framework::dataset::make("DataType",
+ DataType::F16)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeW3x3, NEDepthwiseConvolutionLayerFixtureOptimized<half>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
framework::dataset::make("DepthMultiplier", 1)),
framework::dataset::make("DataType",
@@ -473,14 +502,14 @@ FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerFixture3x3<
{
validate(Accessor(_target), _reference, tolerance_f16);
}
-TEST_SUITE_END() // W3x3
+TEST_SUITE_END() // Optimized
TEST_SUITE_END() // FP16
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE_END() // Float
template <typename T>
-using NEDepthwiseConvolutionLayerQuantizedFixture3x3 = DepthwiseConvolutionLayerValidationQuantizedFixture<Tensor, Accessor, NEDepthwiseConvolutionLayerOptimized, T>;
+using NEDepthwiseConvolutionLayerQuantizedFixtureOptimized = DepthwiseConvolutionLayerValidationQuantizedFixture<Tensor, Accessor, NEDepthwiseConvolutionLayerOptimized, T>;
template <typename T>
using NEDepthwiseConvolutionLayerQuantizedFixture = DepthwiseConvolutionLayerValidationQuantizedFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>;
@@ -522,10 +551,10 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture<uin
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-TEST_SUITE_END() //Dilation
+TEST_SUITE_END() // Dilation
TEST_SUITE_END() // Generic
TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT,
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<uint8_t>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers),
framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
@@ -535,11 +564,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3<
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType",
- DataType::QASYMM8)),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+ depth_multipliers),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
@@ -547,20 +575,21 @@ FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerQuantizedFi
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
- framework::dataset::make("DepthMultiplier", 1)),
- framework::dataset::make("DataType",
- DataType::QASYMM8)),
+
+TEST_SUITE(Dilation)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+ framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 10) })),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
depth_multipliers),
framework::dataset::make("DataType", DataType::QASYMM8)),
framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
@@ -570,23 +599,27 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3<
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // W3x3
-TEST_SUITE(Dilation)
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT,
- combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
+TEST_SUITE(Optimized)
+FIXTURE_DATA_TEST_CASE(RunSmall3x3, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+ framework::dataset::make("DepthMultiplier", 1)),
+ framework::dataset::make("DataType",
+ DataType::QASYMM8)),
framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
- framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 10) })),
+ framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsDataset))
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
- depth_multipliers),
- framework::dataset::make("DataType", DataType::QASYMM8)),
+FIXTURE_DATA_TEST_CASE(RunSmall5x5, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<uint8_t>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(),
+ framework::dataset::make("DepthMultiplier", 1)),
+ framework::dataset::make("DataType",
+ DataType::QASYMM8)),
framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
@@ -594,8 +627,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3<
{
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
-TEST_SUITE_END() // Dilation
-TEST_SUITE_END() // W3x3
+FIXTURE_DATA_TEST_CASE(RunLarge3x3, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<uint8_t>, framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+ framework::dataset::make("DepthMultiplier", 1)),
+ framework::dataset::make("DataType",
+ DataType::QASYMM8)),
+ framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+ framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsDataset))
+{
+ validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // Optimized
TEST_SUITE_END() // QASYMM8
TEST_SUITE_END() // Quantized