aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-07-10 19:49:11 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-07-12 11:07:25 +0000
commit4c7585178385241f87288b7903d760d4b4822c6e (patch)
tree529b6c605d29b3c1fea256558acfa3dbdc5e650e /src/core
parent98b8511fd2e6fe850344ee2c69a93475bced811d (diff)
downloadComputeLibrary-4c7585178385241f87288b7903d760d4b4822c6e.tar.gz
COMPMID-2421: Add 5x5 DepthwiseConv2d in the optimized path for NEON
Change-Id: Idecc8e7ce4404af83f0f99ed993be8f961f7b9af Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/1527 Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_2x2_fp16_fp16.cpp31
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp4
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp2
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_fp16.cpp (renamed from src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_1x1_fp16_fp16.cpp)3
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_fp32.cpp (renamed from src/core/NEON/kernels/convolution/depthwise/depthwise_4x4_3x3_2x2_fp32_fp32.cpp)2
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp2
-rw-r--r--src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp2
-rw-r--r--src/core/Utils.cpp32
8 files changed, 36 insertions, 42 deletions
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_2x2_fp16_fp16.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_2x2_fp16_fp16.cpp
deleted file mode 100644
index 83486920e1..0000000000
--- a/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_2x2_fp16_fp16.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "impl_fp16_fp16.hpp"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-namespace depthwise
-{
-template class DepthwiseConvolution<3, 3, 3, 3, 2, 2, float16_t, float16_t, float16_t>;
-} // namespace depthwise
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp
index c4f398e199..648105593c 100644
--- a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp
@@ -30,9 +30,13 @@ template class depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 1, 1, float, f
template class depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 2, 2, float, float, float>;
template class depthwise::DilatedDepthwiseConvolution<4, 4, 3, 3, 1, 1, float, float, float>;
template class depthwise::DilatedDepthwiseConvolution<4, 4, 3, 3, 2, 2, float, float, float>;
+template class depthwise::DilatedDepthwiseConvolution<4, 4, 5, 5, 1, 1, float, float, float>;
+template class depthwise::DilatedDepthwiseConvolution<3, 3, 5, 5, 2, 2, float, float, float>;
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
template class depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 1, 1, float16_t, float16_t, float16_t>;
template class depthwise::DilatedDepthwiseConvolution<3, 3, 3, 3, 2, 2, float16_t, float16_t, float16_t>;
+template class depthwise::DilatedDepthwiseConvolution<3, 3, 5, 5, 1, 1, float16_t, float16_t, float16_t>;
+template class depthwise::DilatedDepthwiseConvolution<3, 3, 5, 5, 2, 2, float16_t, float16_t, float16_t>;
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp
index 879e06158d..3d6777b9cd 100644
--- a/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp
@@ -140,3 +140,5 @@ QAsymm8DilatedDepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows,
template class depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 3, 3, 1, 1>;
template class depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 3, 3, 2, 2>;
+template class depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 5, 5, 1, 1>;
+template class depthwise::QAsymm8DilatedDepthwiseConvolution<2, 2, 5, 5, 2, 2>;
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_1x1_fp16_fp16.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_fp16.cpp
index 23a99a8c62..ffe7844b70 100644
--- a/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_1x1_fp16_fp16.cpp
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_fp16.cpp
@@ -27,5 +27,8 @@
namespace depthwise
{
template class DepthwiseConvolution<3, 3, 3, 3, 1, 1, float16_t, float16_t, float16_t>;
+template class DepthwiseConvolution<3, 3, 3, 3, 2, 2, float16_t, float16_t, float16_t>;
+template class DepthwiseConvolution<3, 3, 5, 5, 1, 1, float16_t, float16_t, float16_t>;
+template class DepthwiseConvolution<3, 3, 5, 5, 2, 2, float16_t, float16_t, float16_t>;
} // namespace depthwise
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_4x4_3x3_2x2_fp32_fp32.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_fp32.cpp
index a04609d041..331e15868d 100644
--- a/src/core/NEON/kernels/convolution/depthwise/depthwise_4x4_3x3_2x2_fp32_fp32.cpp
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_fp32.cpp
@@ -26,4 +26,6 @@
namespace depthwise
{
template class DepthwiseConvolution<4, 4, 3, 3, 2, 2, float, float, float>;
+template class DepthwiseConvolution<4, 4, 5, 5, 1, 1, float, float, float>;
+template class DepthwiseConvolution<3, 3, 5, 5, 2, 2, float, float, float>;
} // namespace depthwise
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp
index f86f1bad73..21ea350661 100644
--- a/src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp
@@ -97,4 +97,6 @@ void PackParameters<KernelRows, KernelColumns, WeightSize, BiasSize>::execute(
template struct PackParameters<3, 3, 2ul, 2ul>;
template struct PackParameters<3, 3, 4ul, 4ul>;
+template struct PackParameters<5, 5, 2ul, 2ul>;
+template struct PackParameters<5, 5, 4ul, 4ul>;
} // namespace
diff --git a/src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp b/src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp
index 1989f87ef6..f683c6e01f 100644
--- a/src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp
+++ b/src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp
@@ -27,4 +27,6 @@ namespace depthwise
{
template class QAsymm8DepthwiseConvolution<2, 2, 3, 3, 1, 1>;
template class QAsymm8DepthwiseConvolution<2, 2, 3, 3, 2, 2>;
+template class QAsymm8DepthwiseConvolution<2, 2, 5, 5, 1, 1>;
+template class QAsymm8DepthwiseConvolution<2, 2, 5, 5, 2, 2>;
} // namespace depthwise
diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp
index 499a6c8b29..5d32750f0d 100644
--- a/src/core/Utils.cpp
+++ b/src/core/Utils.cpp
@@ -333,17 +333,27 @@ std::string arm_compute::lower_string(const std::string &val)
PadStrideInfo arm_compute::calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info, DataLayout data_layout, const Size2D &dilation)
{
- const unsigned int width_idx = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const unsigned int height_idx = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const auto &strides = conv_info.stride();
- const int out_width = std::ceil(float(input_shape[width_idx]) / float(strides.first));
- const int out_height = std::ceil(float(input_shape[height_idx]) / float(strides.second));
- const int pad_width = (out_width - 1) * strides.first + (weights_shape[width_idx] + (dilation.x() - 1) * (weights_shape[width_idx] - 1) - input_shape[width_idx]);
- const int pad_height = (out_height - 1) * strides.second + (weights_shape[height_idx] + (dilation.y() - 1) * (weights_shape[height_idx] - 1) - input_shape[height_idx]);
- const int same_pad_left = pad_width / 2;
- const int same_pad_top = pad_height / 2;
- const int same_pad_right = pad_width - same_pad_left;
- const int same_pad_bottom = pad_height - same_pad_top;
+ const unsigned int width_idx = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const unsigned int height_idx = arm_compute::get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const auto &strides = conv_info.stride();
+
+ // Calculate output dimensions
+ const int out_width = (input_shape[width_idx] + strides.first - 1) / strides.first;
+ const int out_height = (input_shape[height_idx] + strides.second - 1) / strides.second;
+
+ // Calculate effective weights sizes
+ const int real_weight_width = (weights_shape[width_idx] - 1) * dilation.x() + 1;
+ const int real_weight_height = (weights_shape[height_idx] - 1) * dilation.y() + 1;
+
+ // Calculate total pad
+ const int pad_width = (out_width - 1) * strides.first + real_weight_width - input_shape[width_idx];
+ const int pad_height = (out_height - 1) * strides.second + real_weight_height - input_shape[height_idx];
+
+ // Calculate individual paddings
+ const int same_pad_left = pad_width / 2;
+ const int same_pad_top = pad_height / 2;
+ const int same_pad_right = pad_width - same_pad_left;
+ const int same_pad_bottom = pad_height - same_pad_top;
return { static_cast<unsigned int>(strides.first),
static_cast<unsigned int>(strides.second),