From 4c7585178385241f87288b7903d760d4b4822c6e Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 10 Jul 2019 19:49:11 +0100 Subject: COMPMID-2421: Add 5x5 DepthwiseConv2d in the optimized path for NEON Change-Id: Idecc8e7ce4404af83f0f99ed993be8f961f7b9af Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/1527 Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../depthwise/depthwise_3x3_3x3_1x1_fp16_fp16.cpp | 31 --- .../depthwise/depthwise_3x3_3x3_2x2_fp16_fp16.cpp | 31 --- .../depthwise/depthwise_4x4_3x3_2x2_fp32_fp32.cpp | 29 --- .../convolution/depthwise/depthwise_dilated.cpp | 4 + .../depthwise/depthwise_dilated_qa8_qa8.cpp | 2 + .../convolution/depthwise/depthwise_fp16.cpp | 34 ++++ .../convolution/depthwise/depthwise_fp32.cpp | 31 +++ .../depthwise/depthwise_pack_parameters.cpp | 2 + .../convolution/depthwise/depthwise_qa8_qa8.cpp | 2 + src/core/Utils.cpp | 32 ++-- .../NEDepthwiseConvolutionAssemblyDispatch.cpp | 210 +++++++++++++++------ tests/datasets/DepthwiseConvolutionLayerDataset.h | 31 ++- .../validation/NEON/DepthwiseConvolutionLayer.cpp | 196 +++++++++++-------- 13 files changed, 396 insertions(+), 239 deletions(-) delete mode 100644 src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_1x1_fp16_fp16.cpp delete mode 100644 src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_2x2_fp16_fp16.cpp delete mode 100644 src/core/NEON/kernels/convolution/depthwise/depthwise_4x4_3x3_2x2_fp32_fp32.cpp create mode 100644 src/core/NEON/kernels/convolution/depthwise/depthwise_fp16.cpp create mode 100644 src/core/NEON/kernels/convolution/depthwise/depthwise_fp32.cpp diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_1x1_fp16_fp16.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_1x1_fp16_fp16.cpp deleted file mode 100644 index 23a99a8c62..0000000000 --- a/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_1x1_fp16_fp16.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "impl_fp16_fp16.hpp" - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -namespace depthwise -{ -template class DepthwiseConvolution<3, 3, 3, 3, 1, 1, float16_t, float16_t, float16_t>; -} // namespace depthwise -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_2x2_fp16_fp16.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_2x2_fp16_fp16.cpp deleted file mode 100644 index 83486920e1..0000000000 --- a/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_2x2_fp16_fp16.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "impl_fp16_fp16.hpp" - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -namespace depthwise -{ -template class DepthwiseConvolution<3, 3, 3, 3, 2, 2, float16_t, float16_t, float16_t>; -} // namespace depthwise -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_4x4_3x3_2x2_fp32_fp32.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_4x4_3x3_2x2_fp32_fp32.cpp deleted file mode 100644 index a04609d041..0000000000 --- a/src/core/NEON/kernels/convolution/depthwise/depthwise_4x4_3x3_2x2_fp32_fp32.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "impl_fp32_fp32.hpp" - -namespace depthwise -{ -template class DepthwiseConvolution<4, 4, 3, 3, 2, 2, float, float, float>; -} // namespace depthwise diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp index c4f398e199..648105593c 100644 --- a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp +++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp @@ -30,9 +30,13 @@ template class depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 1, 1, float, f template class depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 2, 2, float, float, float>; template class depthwise::DilatedDepthwiseConvolution<4, 4, 3, 3, 1, 1, float, float, float>; template class depthwise::DilatedDepthwiseConvolution<4, 4, 3, 3, 2, 2, float, float, float>; +template class depthwise::DilatedDepthwiseConvolution<4, 4, 5, 5, 1, 1, float, float, float>; +template class depthwise::DilatedDepthwiseConvolution<3, 3, 5, 5, 2, 2, float, float, float>; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC template class depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 1, 1, float16_t, float16_t, float16_t>; template class depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 2, 2, float16_t, float16_t, float16_t>; +template class depthwise::DilatedDepthwiseConvolution<3, 3, 5, 5, 1, 1, float16_t, float16_t, float16_t>; +template class depthwise::DilatedDepthwiseConvolution<3, 3, 5, 5, 2, 2, float16_t, float16_t, float16_t>; #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp index 879e06158d..3d6777b9cd 100644 --- a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp +++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp @@ -140,3 +140,5 @@ QAsymm8DilatedDepthwiseConvolution; template class depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 3, 3, 2, 2>; +template class depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 5, 5, 1, 1>; +template class depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 5, 5, 2, 2>; diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_fp16.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_fp16.cpp new file mode 100644 index 0000000000..ffe7844b70 --- /dev/null +++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_fp16.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "impl_fp16_fp16.hpp" + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +namespace depthwise +{ +template class DepthwiseConvolution<3, 3, 3, 3, 1, 1, float16_t, float16_t, float16_t>; +template class DepthwiseConvolution<3, 3, 3, 3, 2, 2, float16_t, float16_t, float16_t>; +template class DepthwiseConvolution<3, 3, 5, 5, 1, 1, float16_t, float16_t, float16_t>; +template class DepthwiseConvolution<3, 3, 5, 5, 2, 2, float16_t, float16_t, float16_t>; +} // namespace depthwise +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_fp32.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_fp32.cpp new file mode 100644 index 0000000000..331e15868d --- /dev/null +++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_fp32.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2018-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "impl_fp32_fp32.hpp" + +namespace depthwise +{ +template class DepthwiseConvolution<4, 4, 3, 3, 2, 2, float, float, float>; +template class DepthwiseConvolution<4, 4, 5, 5, 1, 1, float, float, float>; +template class DepthwiseConvolution<3, 3, 5, 5, 2, 2, float, float, float>; +} // namespace depthwise diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp index f86f1bad73..21ea350661 100644 --- a/src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp +++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp @@ -97,4 +97,6 @@ void PackParameters::execute( template struct PackParameters<3, 3, 2ul, 2ul>; template struct PackParameters<3, 3, 4ul, 4ul>; +template struct PackParameters<5, 5, 2ul, 2ul>; +template struct PackParameters<5, 5, 4ul, 4ul>; } // namespace diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp index 1989f87ef6..f683c6e01f 100644 --- a/src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp +++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp @@ -27,4 +27,6 @@ namespace depthwise { template class QAsymm8DepthwiseConvolution<2, 2, 3, 3, 1, 1>; template class QAsymm8DepthwiseConvolution<2, 2, 3, 3, 2, 2>; +template class QAsymm8DepthwiseConvolution<2, 2, 5, 5, 1, 1>; +template class QAsymm8DepthwiseConvolution<2, 2, 5, 5, 2, 2>; } // namespace depthwise diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index 499a6c8b29..5d32750f0d 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -333,17 +333,27 @@ std::string arm_compute::lower_string(const std::string &val) PadStrideInfo arm_compute::calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info, DataLayout data_layout, const Size2D &dilation) { - const unsigned int width_idx = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const unsigned int height_idx = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const auto &strides = conv_info.stride(); - const int out_width = std::ceil(float(input_shape[width_idx]) / float(strides.first)); - const int out_height = std::ceil(float(input_shape[height_idx]) / float(strides.second)); - const int pad_width = (out_width - 1) * strides.first + (weights_shape[width_idx] + (dilation.x() - 1) * (weights_shape[width_idx] - 1) - input_shape[width_idx]); - const int pad_height = (out_height - 1) * strides.second + (weights_shape[height_idx] + (dilation.y() - 1) * (weights_shape[height_idx] - 1) - input_shape[height_idx]); - const int same_pad_left = pad_width / 2; - const int same_pad_top = pad_height / 2; - const int same_pad_right = pad_width - same_pad_left; - const int same_pad_bottom = pad_height - same_pad_top; + const unsigned int width_idx = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const unsigned int height_idx = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const auto &strides = conv_info.stride(); + + // Calculate output dimensions + const int out_width = (input_shape[width_idx] + strides.first - 1) / strides.first; + const int out_height = (input_shape[height_idx] + strides.second - 1) / strides.second; + + // Calculate effective weights sizes + const int real_weight_width = (weights_shape[width_idx] - 1) * dilation.x() + 1; + const int real_weight_height = (weights_shape[height_idx] - 1) * dilation.y() + 1; + + // Calculate total pad + const int pad_width = (out_width - 1) * strides.first + real_weight_width - input_shape[width_idx]; + const int pad_height = (out_height - 1) * strides.second + real_weight_height - input_shape[height_idx]; + + // Calculate individual paddings + const int same_pad_left = pad_width / 2; + const int same_pad_top = pad_height / 2; + const int same_pad_right = pad_width - same_pad_left; + const int same_pad_bottom = pad_height - same_pad_top; return { static_cast(strides.first), static_cast(strides.second), diff --git a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp index b28aaa715f..92ad93e4a7 100644 --- a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp @@ -36,10 +36,136 @@ #include "arm_compute/runtime/NEON/NEScheduler.h" +#include + namespace arm_compute { namespace { +std::unique_ptr get_qasymm8_convolver(int kernel_size, int stride_x, + int n_batches, int in_rows, int in_cols, int n_channels, + int dilation_factor, neon_convolution_kernels::ActivationFunction activation, + const qasymm8::QAsymm8Params &wqinfo, const qasymm8::QAsymm8Params &iqinfo, const qasymm8::QAsymm8Params &oqinfo, + const qasymm8::QAsymm8RescaleParams &rescale_params, + int padding_top, int padding_left, int padding_bottom, int padding_right) +{ + switch(kernel_size) + { + case 3: + { + switch(stride_x) + { + case 1: + return arm_compute::support::cpp14::make_unique>( + n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right); + case 2: + return arm_compute::support::cpp14::make_unique>( + n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right); + default: + return nullptr; + } + } + case 5: + { + switch(stride_x) + { + case 1: + return arm_compute::support::cpp14::make_unique>( + n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right); + case 2: + return arm_compute::support::cpp14::make_unique>( + n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right); + default: + return nullptr; + } + } + default: + return nullptr; + } +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +std::unique_ptr get_fp16_convolver(int kernel_size, int stride_x, + int n_batches, int in_rows, int in_cols, int n_channels, + int dilation_factor, neon_convolution_kernels::ActivationFunction activation, + int padding_top, int padding_left, int padding_bottom, int padding_right) +{ + switch(kernel_size) + { + case 3: + { + switch(stride_x) + { + case 1: + return arm_compute::support::cpp14::make_unique>( + n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right); + case 2: + return arm_compute::support::cpp14::make_unique>( + n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right); + default: + return nullptr; + } + } + case 5: + { + switch(stride_x) + { + case 1: + return arm_compute::support::cpp14::make_unique>( + n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right); + case 2: + return arm_compute::support::cpp14::make_unique>( + n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right); + default: + return nullptr; + } + } + default: + return nullptr; + } +} +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +std::unique_ptr get_fp32_convolver(int kernel_size, int stride_x, + int n_batches, int in_rows, int in_cols, int n_channels, + int dilation_factor, neon_convolution_kernels::ActivationFunction activation, + int padding_top, int padding_left, int padding_bottom, int padding_right) +{ + switch(kernel_size) + { + case 3: + { + switch(stride_x) + { + case 1: + return arm_compute::support::cpp14::make_unique>( + n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right); + case 2: + return arm_compute::support::cpp14::make_unique>( + n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right); + default: + return nullptr; + } + } + case 5: + { + switch(stride_x) + { + case 1: + return arm_compute::support::cpp14::make_unique>( + n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right); + case 2: + return arm_compute::support::cpp14::make_unique>( + n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right); + default: + return nullptr; + } + } + default: + return nullptr; + } +} + std::unique_ptr create_convolver(const ITensor *input, const ITensor *weights, ITensor *output, @@ -61,7 +187,8 @@ std::unique_ptr create_convolver(const ITensor const int padding_bottom = conv_info.pad_bottom(); const int padding_right = conv_info.pad_right(); - const unsigned int stride_x = conv_info.stride().first; + const unsigned int stride_x = conv_info.stride().first; + const unsigned int kernel_size = weights->info()->tensor_shape().y(); // Map activation function neon_convolution_kernels::ActivationFunction activation = neon_convolution_kernels::ActivationFunction::None; @@ -96,18 +223,8 @@ std::unique_ptr create_convolver(const ITensor quantization::calculate_quantized_multiplier_less_than_one(fmultipler, &qmultiplier, &qshift); qasymm8::QAsymm8RescaleParams rescale_params(qshift, qmultiplier, fmultipler); - // Create convolver - switch(stride_x) - { - case 1: - return arm_compute::support::cpp14::make_unique>( - n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right); - case 2: - return arm_compute::support::cpp14::make_unique>( - n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right); - default: - return nullptr; - } + return get_qasymm8_convolver(kernel_size, stride_x, n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, + wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right); } else { @@ -117,34 +234,12 @@ std::unique_ptr create_convolver(const ITensor #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: { - switch(stride_x) - { - case 1: - return arm_compute::support::cpp14::make_unique>( - n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right); - case 2: - return arm_compute::support::cpp14::make_unique>( - n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right); - default: - return nullptr; - } - break; + return get_fp16_convolver(kernel_size, stride_x, n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right); } #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F32: { - switch(stride_x) - { - case 1: - return arm_compute::support::cpp14::make_unique>( - n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right); - case 2: - return arm_compute::support::cpp14::make_unique>( - n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right); - default: - return nullptr; - } - break; + return get_fp32_convolver(kernel_size, stride_x, n_batches, in_rows, in_cols, n_channels, dilation_factor, activation, padding_top, padding_left, padding_bottom, padding_right); } default: return nullptr; @@ -236,15 +331,10 @@ Status NEDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); - const auto strides = conv_info.stride(); - const DataLayout data_layout = input->data_layout(); - unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != 3 || weights->dimension(height_idx) != 3); - ARM_COMPUTE_RETURN_ERROR_ON(!((strides.first == strides.second) && ((strides.first == 1) || (strides.first == 2)))); - ARM_COMPUTE_RETURN_ERROR_ON(depth_multiplier != 1); - ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() != dilation.y()); + // Validate convolver + ARM_COMPUTE_RETURN_ERROR_ON(!is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation)); + // Validate activation const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info); const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info); ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled() && !(is_relu || is_relu6)); @@ -252,7 +342,7 @@ Status NEDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo // Check bias if(bias != nullptr) { - unsigned int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL); ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1); ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(0) != weights->dimension(channel_idx)); } @@ -291,24 +381,28 @@ bool NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(const ITenso bool is_data_type_valid = is_data_type_float(data_type) || is_data_type_quantized_asymmetric(data_type); // Check weighs size - const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - bool weights_supported = (weights->dimension(width_idx) == 3) && (weights->dimension(height_idx) == 3); + std::set supported_kernel_sizes = { 3, 5 }; + const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const unsigned int kernel_w = weights->dimension(width_idx); + const unsigned int kernel_h = weights->dimension(height_idx); + bool weights_supported = (kernel_w == kernel_h) && (supported_kernel_sizes.count(kernel_w) != 0); // Check for supported strides const auto &strides = conv_info.stride(); bool supported_strides = (strides.first == strides.second) && ((strides.first == 1) || (strides.first == 2)); // Check for supported padding - const auto pad_top = conv_info.pad_top(); - const auto pad_right = conv_info.pad_right(); - const auto pad_bottom = conv_info.pad_bottom(); - const auto pad_left = conv_info.pad_left(); - PadStrideInfo same_pad = calculate_same_pad(in_shape, TensorShape(3U, 3U), conv_info); - bool is_same_padding = (pad_top == same_pad.pad_top()) && (pad_right == same_pad.pad_right()) && (pad_bottom == same_pad.pad_bottom()) && (pad_left == same_pad.pad_left()); - bool is_valid_padding = (pad_top == 0) && (pad_right == 0) && (pad_bottom == 0) && (pad_left == 0); - bool supported_padding = is_same_padding || is_valid_padding; - bool is_dilation_supported = (dilation.x() == dilation.y()) || (dilation == Size2D(1U, 1U)); + const auto pad_top = conv_info.pad_top(); + const auto pad_right = conv_info.pad_right(); + const auto pad_bottom = conv_info.pad_bottom(); + const auto pad_left = conv_info.pad_left(); + PadStrideInfo same_pad = calculate_same_pad(in_shape, TensorShape(kernel_w, kernel_h), conv_info, DataLayout::NCHW, dilation); + bool is_same_padding = (pad_top == same_pad.pad_top()) && (pad_right == same_pad.pad_right()) && (pad_bottom == same_pad.pad_bottom()) && (pad_left == same_pad.pad_left()); + bool is_valid_padding = (pad_top == 0) && (pad_right == 0) && (pad_bottom == 0) && (pad_left == 0); + bool supported_padding = is_same_padding || is_valid_padding; + // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported + bool is_dilation_supported = (dilation == Size2D(1U, 1U)) || ((dilation.x() == dilation.y()) && strides.first == 1); return is_data_type_valid && weights_supported && supported_strides && supported_padding && (depth_multiplier == 1) && is_dilation_supported; } diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h index 440cb88ac2..d0617275c0 100644 --- a/tests/datasets/DepthwiseConvolutionLayerDataset.h +++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h @@ -211,11 +211,15 @@ public: { // Stride 1 add_config(TensorShape(7U, 7U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)); + add_config(TensorShape(7U, 7U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL), Size2D(2U, 2U)); add_config(TensorShape(7U, 7U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)); + add_config(TensorShape(7U, 7U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 2, 2, DimensionRoundingType::CEIL), Size2D(2U, 2U)); // Stride 2 - add_config(TensorShape(7U, 7U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)); - add_config(TensorShape(7U, 7U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1, 1, 1, DimensionRoundingType::CEIL)); - add_config(TensorShape(9U, 9U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1, 1, 1, DimensionRoundingType::CEIL), Size2D(2U, 2U)); + add_config(TensorShape(9U, 9U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)); + add_config(TensorShape(9U, 9U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), Size2D(2U, 2U)); + add_config(TensorShape(9U, 9U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1, DimensionRoundingType::CEIL)); + // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported + // add_config(TensorShape(9U, 9U, 1U), Size2D(3U, 3U), PadStrideInfo(2, 2, 2, 2, DimensionRoundingType::CEIL), Size2D(2U, 2U)); } }; /** Dataset containing optimized, 3x3 depthwise convolution shapes. */ @@ -239,6 +243,27 @@ public: add_config(TensorShape(64U, 64U, 128U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL)); } }; + +/** Dataset containing optimized, 5x5 depthwise convolution shapes. */ +class SmallOptimizedDepthwiseConvolutionLayerDataset5x5 final : public DepthwiseConvolutionLayerDataset +{ +public: + SmallOptimizedDepthwiseConvolutionLayerDataset5x5() + { + // Stride 1 + add_config(TensorShape(7U, 7U, 16U), Size2D(5U, 5U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)); + add_config(TensorShape(11U, 11U, 16U), Size2D(5U, 5U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL), Size2D(2U, 2U)); + add_config(TensorShape(7U, 7U, 16U), Size2D(5U, 5U), PadStrideInfo(1, 1, 2, 2, DimensionRoundingType::CEIL)); + add_config(TensorShape(7U, 7U, 16U), Size2D(5U, 5U), PadStrideInfo(1, 1, 4, 4, DimensionRoundingType::CEIL), Size2D(2U, 2U)); + // Stride 2 + add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)); + // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported + // add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), Size2D(2U, 2U)); + add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 2, 2, 2, 2, DimensionRoundingType::CEIL)); + // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported + // add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 4, 4, 4, 4, DimensionRoundingType::CEIL), Size2D(2U, 2U)); + } +}; } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp index 2ffe540fbc..ec9eb107aa 100644 --- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp @@ -294,38 +294,41 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture, fram TEST_SUITE_END() // Dilation TEST_SUITE_END() // Generic -TEST_SUITE(W3x3) template -using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) +using NEDepthwiseConvolutionLayerFixtureOptimized = DepthwiseConvolutionLayerValidationFixture; + +TEST_SUITE(W3x3) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixtureOptimized, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixtureOptimized, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } TEST_SUITE(Dilation) -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixtureOptimized, framework::DatasetMode::ALL, + combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixtureOptimized, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", @@ -337,8 +340,10 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, f } TEST_SUITE_END() // Dilation +TEST_SUITE_END() // W3x3 -FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::PRECOMMIT, +TEST_SUITE(Optimized) +FIXTURE_DATA_TEST_CASE(RunSmall3x3, NEDepthwiseConvolutionLayerFixtureOptimized, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DepthMultiplier", 1)), framework::dataset::make("DataType", @@ -348,7 +353,17 @@ FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerFixture3x3< { validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, +FIXTURE_DATA_TEST_CASE(RunSmall5x5, NEDepthwiseConvolutionLayerFixtureOptimized, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge3x3, NEDepthwiseConvolutionLayerFixtureOptimized, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DepthMultiplier", 1)), framework::dataset::make("DataType", @@ -358,7 +373,7 @@ FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerFixture3x3< { validate(Accessor(_target), _reference, tolerance_f32); } -TEST_SUITE_END() // W3x3 +TEST_SUITE_END() // Optimized TEST_SUITE_END() // F32 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC @@ -407,40 +422,42 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture, frame TEST_SUITE_END() // Dilation TEST_SUITE_END() // Generic -TEST_SUITE(W3x3) template -using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) +using NEDepthwiseConvolutionLayerFixtureOptimized = DepthwiseConvolutionLayerValidationFixture; +TEST_SUITE(W3x3) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixtureOptimized, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixtureOptimized, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16); } TEST_SUITE(Dilation) -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixtureOptimized, framework::DatasetMode::ALL, + combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixtureOptimized, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", @@ -452,8 +469,10 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, fr } TEST_SUITE_END() // Dilation +TEST_SUITE_END() // W3x3 -FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::PRECOMMIT, +TEST_SUITE(Optimized) +FIXTURE_DATA_TEST_CASE(RunSmallW3x3, NEDepthwiseConvolutionLayerFixtureOptimized, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DepthMultiplier", 1)), framework::dataset::make("DataType", @@ -463,7 +482,17 @@ FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerFixture3x3< { validate(Accessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, +FIXTURE_DATA_TEST_CASE(RunSmallW5x5, NEDepthwiseConvolutionLayerFixtureOptimized, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + validate(Accessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLargeW3x3, NEDepthwiseConvolutionLayerFixtureOptimized, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), framework::dataset::make("DepthMultiplier", 1)), framework::dataset::make("DataType", @@ -473,14 +502,14 @@ FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerFixture3x3< { validate(Accessor(_target), _reference, tolerance_f16); } -TEST_SUITE_END() // W3x3 +TEST_SUITE_END() // Optimized TEST_SUITE_END() // FP16 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE_END() // Float template -using NEDepthwiseConvolutionLayerQuantizedFixture3x3 = DepthwiseConvolutionLayerValidationQuantizedFixture; +using NEDepthwiseConvolutionLayerQuantizedFixtureOptimized = DepthwiseConvolutionLayerValidationQuantizedFixture; template using NEDepthwiseConvolutionLayerQuantizedFixture = DepthwiseConvolutionLayerValidationQuantizedFixture; @@ -522,10 +551,10 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), @@ -535,11 +564,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3< { validate(Accessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", - DataType::QASYMM8)), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), @@ -547,20 +575,21 @@ FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerQuantizedFi { validate(Accessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", - DataType::QASYMM8)), + +TEST_SUITE(Dilation) + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 10) })), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), @@ -570,23 +599,27 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3< { validate(Accessor(_target), _reference, tolerance_qasymm8); } +TEST_SUITE_END() // Dilation +TEST_SUITE_END() // W3x3 -TEST_SUITE(Dilation) - -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), +TEST_SUITE(Optimized) +FIXTURE_DATA_TEST_CASE(RunSmall3x3, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::QASYMM8)), framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 10) })), + framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), +FIXTURE_DATA_TEST_CASE(RunSmall5x5, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::QASYMM8)), framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), @@ -594,8 +627,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3< { validate(Accessor(_target), _reference, tolerance_qasymm8); } -TEST_SUITE_END() // Dilation -TEST_SUITE_END() // W3x3 +FIXTURE_DATA_TEST_CASE(RunLarge3x3, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::QASYMM8)), + framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // Optimized TEST_SUITE_END() // QASYMM8 TEST_SUITE_END() // Quantized -- cgit v1.2.1