aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2018-02-19 16:46:03 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:47:18 +0000
commitd267b05aaaec9b462a8c988c7b5fcebd5776c72f (patch)
treea069b62f1294a07c6190d0e043ecdba6f7a2a27c
parent1fbb8120aa4f5ea2e73393421611ffea803a05b4 (diff)
downloadComputeLibrary-d267b05aaaec9b462a8c988c7b5fcebd5776c72f.tar.gz
COMPMID-784: Fixed SAME paddding in WinogradLayer
There were mismatches when using kernel size 5 and padding = SAME Change-Id: Id834e96ebcf665616f99c995b48e302dcff8dc48 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/121144 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r--arm_compute/core/NEON/kernels/convolution/common/utils.hpp14
-rw-r--r--src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_5x5_fp32.cpp156
-rw-r--r--tests/datasets/SmallConvolutionLayerDataset.h1
3 files changed, 87 insertions, 84 deletions
diff --git a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp
index d8b9c3b7d3..a22809fb58 100644
--- a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,13 +25,15 @@
#pragma once
double TimeInUs(void);
-void PrintMatrix(const float* const m, const int M, const int N, const int row_stride);
+void PrintMatrix(const float *const m, const int M, const int N, const int row_stride);
-inline int iceildiv(const int a, const int b) {
- return (a + b - 1) / b;
+inline int iceildiv(const int a, const int b)
+{
+ return (a + b - 1) / b;
}
template <typename T>
-inline T roundup(const T a, const T b) {
- return a + b - (a % b);
+inline T roundup(const T a, const T b)
+{
+ return b * iceildiv(a, b);
}
diff --git a/src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_5x5_fp32.cpp b/src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_5x5_fp32.cpp
index d9ebe8b7cd..fd30b6118e 100644
--- a/src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_5x5_fp32.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_5x5_fp32.cpp
@@ -21,7 +21,6 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-
#include "arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp"
#include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp"
#include "arm_compute/core/NEON/kernels/convolution/common/arm.hpp"
@@ -35,6 +34,7 @@ template <>
template <>
int Transform::ops_performed(const Tensor4DShape &input_shape)
{
+ (void) input_shape;
return 0; // TODO
}
@@ -45,8 +45,8 @@ int Transform::ops_performed(const Tensor4DShape &input_shape)
* different padding combinations which may be required. These padding
* constraints are the space:
*
-* Padding top in {0, 1}
-* Padding left in {0, 1}
+* Padding top in {0, 2}
+* Padding left in {0, 2}
* Padding bottom in {0, 1, 2, 3, 4}
* Padding right in {0, 1, 2, 3, 4}
*/
@@ -340,115 +340,115 @@ const Transform::TileFn Transform::tile_fns[2][2][max_pad_bottom][max_pad_right]
},
{
{
- Transform::template process_tile<0, 1, 0, 0>, // Left
- Transform::template process_tile<0, 1, 0, 1>,
- Transform::template process_tile<0, 1, 0, 2>,
- Transform::template process_tile<0, 1, 0, 3>,
- Transform::template process_tile<0, 1, 0, 4>,
+ Transform::template process_tile<0, 2, 0, 0>, // Left
+ Transform::template process_tile<0, 2, 0, 1>,
+ Transform::template process_tile<0, 2, 0, 2>,
+ Transform::template process_tile<0, 2, 0, 3>,
+ Transform::template process_tile<0, 2, 0, 4>,
},
{
- Transform::template process_tile<0, 1, 1, 0>, // Bottom left
- Transform::template process_tile<0, 1, 1, 1>,
- Transform::template process_tile<0, 1, 1, 2>,
- Transform::template process_tile<0, 1, 1, 3>,
- Transform::template process_tile<0, 1, 1, 4>,
+ Transform::template process_tile<0, 2, 1, 0>, // Bottom left
+ Transform::template process_tile<0, 2, 1, 1>,
+ Transform::template process_tile<0, 2, 1, 2>,
+ Transform::template process_tile<0, 2, 1, 3>,
+ Transform::template process_tile<0, 2, 1, 4>,
},
{
- Transform::template process_tile<0, 1, 2, 0>, // " "
- Transform::template process_tile<0, 1, 2, 1>,
- Transform::template process_tile<0, 1, 2, 2>,
- Transform::template process_tile<0, 1, 2, 3>,
- Transform::template process_tile<0, 1, 2, 4>,
+ Transform::template process_tile<0, 2, 2, 0>, // " "
+ Transform::template process_tile<0, 2, 2, 1>,
+ Transform::template process_tile<0, 2, 2, 2>,
+ Transform::template process_tile<0, 2, 2, 3>,
+ Transform::template process_tile<0, 2, 2, 4>,
},
{
- Transform::template process_tile<0, 1, 3, 0>, // " "
- Transform::template process_tile<0, 1, 3, 1>,
- Transform::template process_tile<0, 1, 3, 2>,
- Transform::template process_tile<0, 1, 3, 3>,
- Transform::template process_tile<0, 1, 3, 4>,
+ Transform::template process_tile<0, 2, 3, 0>, // " "
+ Transform::template process_tile<0, 2, 3, 1>,
+ Transform::template process_tile<0, 2, 3, 2>,
+ Transform::template process_tile<0, 2, 3, 3>,
+ Transform::template process_tile<0, 2, 3, 4>,
},
{
- Transform::template process_tile<0, 1, 4, 0>, // " "
- Transform::template process_tile<0, 1, 4, 1>,
- Transform::template process_tile<0, 1, 4, 2>,
- Transform::template process_tile<0, 1, 4, 3>,
- Transform::template process_tile<0, 1, 4, 4>,
+ Transform::template process_tile<0, 2, 4, 0>, // " "
+ Transform::template process_tile<0, 2, 4, 1>,
+ Transform::template process_tile<0, 2, 4, 2>,
+ Transform::template process_tile<0, 2, 4, 3>,
+ Transform::template process_tile<0, 2, 4, 4>,
}
}
},
{
{
{
- Transform::template process_tile<1, 0, 0, 0>, // Top
- Transform::template process_tile<1, 0, 0, 1>, // Top right
- Transform::template process_tile<1, 0, 0, 2>, // " "
- Transform::template process_tile<1, 0, 0, 3>, // " "
- Transform::template process_tile<1, 0, 0, 4>, // " "
+ Transform::template process_tile<2, 0, 0, 0>, // Top
+ Transform::template process_tile<2, 0, 0, 1>, // Top right
+ Transform::template process_tile<2, 0, 0, 2>, // " "
+ Transform::template process_tile<2, 0, 0, 3>, // " "
+ Transform::template process_tile<2, 0, 0, 4>, // " "
},
{
- Transform::template process_tile<1, 0, 1, 0>,
- Transform::template process_tile<1, 0, 1, 1>,
- Transform::template process_tile<1, 0, 1, 2>,
- Transform::template process_tile<1, 0, 1, 3>,
- Transform::template process_tile<1, 0, 1, 4>,
+ Transform::template process_tile<2, 0, 1, 0>,
+ Transform::template process_tile<2, 0, 1, 1>,
+ Transform::template process_tile<2, 0, 1, 2>,
+ Transform::template process_tile<2, 0, 1, 3>,
+ Transform::template process_tile<2, 0, 1, 4>,
},
{
- Transform::template process_tile<1, 0, 2, 0>,
- Transform::template process_tile<1, 0, 2, 1>,
- Transform::template process_tile<1, 0, 2, 2>,
- Transform::template process_tile<1, 0, 2, 3>,
- Transform::template process_tile<1, 0, 2, 4>,
+ Transform::template process_tile<2, 0, 2, 0>,
+ Transform::template process_tile<2, 0, 2, 1>,
+ Transform::template process_tile<2, 0, 2, 2>,
+ Transform::template process_tile<2, 0, 2, 3>,
+ Transform::template process_tile<2, 0, 2, 4>,
},
{
- Transform::template process_tile<1, 0, 3, 0>,
- Transform::template process_tile<1, 0, 3, 1>,
- Transform::template process_tile<1, 0, 3, 2>,
- Transform::template process_tile<1, 0, 3, 3>,
- Transform::template process_tile<1, 0, 3, 4>,
+ Transform::template process_tile<2, 0, 3, 0>,
+ Transform::template process_tile<2, 0, 3, 1>,
+ Transform::template process_tile<2, 0, 3, 2>,
+ Transform::template process_tile<2, 0, 3, 3>,
+ Transform::template process_tile<2, 0, 3, 4>,
},
{
- Transform::template process_tile<1, 0, 4, 0>,
- Transform::template process_tile<1, 0, 4, 1>,
- Transform::template process_tile<1, 0, 4, 2>,
- Transform::template process_tile<1, 0, 4, 3>,
- Transform::template process_tile<1, 0, 4, 4>,
+ Transform::template process_tile<2, 0, 4, 0>,
+ Transform::template process_tile<2, 0, 4, 1>,
+ Transform::template process_tile<2, 0, 4, 2>,
+ Transform::template process_tile<2, 0, 4, 3>,
+ Transform::template process_tile<2, 0, 4, 4>,
},
},
{
{
- Transform::template process_tile<1, 1, 0, 0>, // Top left
- Transform::template process_tile<1, 1, 0, 1>,
- Transform::template process_tile<1, 1, 0, 2>,
- Transform::template process_tile<1, 1, 0, 3>,
- Transform::template process_tile<1, 1, 0, 4>,
+ Transform::template process_tile<2, 2, 0, 0>, // Top left
+ Transform::template process_tile<2, 2, 0, 1>,
+ Transform::template process_tile<2, 2, 0, 2>,
+ Transform::template process_tile<2, 2, 0, 3>,
+ Transform::template process_tile<2, 2, 0, 4>,
},
{
- Transform::template process_tile<1, 1, 1, 0>,
- Transform::template process_tile<1, 1, 1, 1>,
- Transform::template process_tile<1, 1, 1, 2>,
- Transform::template process_tile<1, 1, 1, 3>,
- Transform::template process_tile<1, 1, 1, 4>,
+ Transform::template process_tile<2, 2, 1, 0>,
+ Transform::template process_tile<2, 2, 1, 1>,
+ Transform::template process_tile<2, 2, 1, 2>,
+ Transform::template process_tile<2, 2, 1, 3>,
+ Transform::template process_tile<2, 2, 1, 4>,
},
{
- Transform::template process_tile<1, 1, 2, 0>,
- Transform::template process_tile<1, 1, 2, 1>,
- Transform::template process_tile<1, 1, 2, 2>,
- Transform::template process_tile<1, 1, 2, 3>,
- Transform::template process_tile<1, 1, 2, 4>,
+ Transform::template process_tile<2, 2, 2, 0>,
+ Transform::template process_tile<2, 2, 2, 1>,
+ Transform::template process_tile<2, 2, 2, 2>,
+ Transform::template process_tile<2, 2, 2, 3>,
+ Transform::template process_tile<2, 2, 2, 4>,
},
{
- Transform::template process_tile<1, 1, 3, 0>,
- Transform::template process_tile<1, 1, 3, 1>,
- Transform::template process_tile<1, 1, 3, 2>,
- Transform::template process_tile<1, 1, 3, 3>,
- Transform::template process_tile<1, 1, 3, 4>,
+ Transform::template process_tile<2, 2, 3, 0>,
+ Transform::template process_tile<2, 2, 3, 1>,
+ Transform::template process_tile<2, 2, 3, 2>,
+ Transform::template process_tile<2, 2, 3, 3>,
+ Transform::template process_tile<2, 2, 3, 4>,
},
{
- Transform::template process_tile<1, 1, 4, 0>,
- Transform::template process_tile<1, 1, 4, 1>,
- Transform::template process_tile<1, 1, 4, 2>,
- Transform::template process_tile<1, 1, 4, 3>,
- Transform::template process_tile<1, 1, 4, 4>,
+ Transform::template process_tile<2, 2, 4, 0>,
+ Transform::template process_tile<2, 2, 4, 1>,
+ Transform::template process_tile<2, 2, 4, 2>,
+ Transform::template process_tile<2, 2, 4, 3>,
+ Transform::template process_tile<2, 2, 4, 4>,
}
}
}
diff --git a/tests/datasets/SmallConvolutionLayerDataset.h b/tests/datasets/SmallConvolutionLayerDataset.h
index d88efbd21b..adb61de8e2 100644
--- a/tests/datasets/SmallConvolutionLayerDataset.h
+++ b/tests/datasets/SmallConvolutionLayerDataset.h
@@ -51,6 +51,7 @@ public:
// Kernel size 5
add_config(TensorShape(8U, 8U, 2U), TensorShape(5U, 5U, 2U, 1U), TensorShape(1U), TensorShape(4U, 4U, 1U), PadStrideInfo(1, 1, 0, 0));
+ add_config(TensorShape(8U, 8U, 2U), TensorShape(5U, 5U, 2U), TensorShape(1U), TensorShape(8U, 8U, 1U), PadStrideInfo(1, 1, 2, 2));
}
};