diff options
3 files changed, 87 insertions, 84 deletions
diff --git a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp index d8b9c3b7d3..a22809fb58 100644 --- a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp +++ b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -25,13 +25,15 @@ #pragma once double TimeInUs(void); -void PrintMatrix(const float* const m, const int M, const int N, const int row_stride); +void PrintMatrix(const float *const m, const int M, const int N, const int row_stride); -inline int iceildiv(const int a, const int b) { - return (a + b - 1) / b; +inline int iceildiv(const int a, const int b) +{ + return (a + b - 1) / b; } template <typename T> -inline T roundup(const T a, const T b) { - return a + b - (a % b); +inline T roundup(const T a, const T b) +{ + return b * iceildiv(a, b); } diff --git a/src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_5x5_fp32.cpp b/src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_5x5_fp32.cpp index d9ebe8b7cd..fd30b6118e 100644 --- a/src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_5x5_fp32.cpp +++ b/src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_5x5_fp32.cpp @@ -21,7 +21,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - #include "arm_compute/core/NEON/kernels/convolution/winograd/transforms/input.hpp" #include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp" #include "arm_compute/core/NEON/kernels/convolution/common/arm.hpp" @@ -35,6 +34,7 @@ template <> template <> int Transform::ops_performed(const Tensor4DShape &input_shape) { + (void) input_shape; return 0; // TODO } @@ -45,8 +45,8 @@ int Transform::ops_performed(const Tensor4DShape &input_shape) * different padding combinations which may be required. These padding * constraints are the space: * -* Padding top in {0, 1} -* Padding left in {0, 1} +* Padding top in {0, 2} +* Padding left in {0, 2} * Padding bottom in {0, 1, 2, 3, 4} * Padding right in {0, 1, 2, 3, 4} */ @@ -340,115 +340,115 @@ const Transform::TileFn Transform::tile_fns[2][2][max_pad_bottom][max_pad_right] }, { { - Transform::template process_tile<0, 1, 0, 0>, // Left - Transform::template process_tile<0, 1, 0, 1>, - Transform::template process_tile<0, 1, 0, 2>, - Transform::template process_tile<0, 1, 0, 3>, - Transform::template process_tile<0, 1, 0, 4>, + Transform::template process_tile<0, 2, 0, 0>, // Left + Transform::template process_tile<0, 2, 0, 1>, + Transform::template process_tile<0, 2, 0, 2>, + Transform::template process_tile<0, 2, 0, 3>, + Transform::template process_tile<0, 2, 0, 4>, }, { - Transform::template process_tile<0, 1, 1, 0>, // Bottom left - Transform::template process_tile<0, 1, 1, 1>, - Transform::template process_tile<0, 1, 1, 2>, - Transform::template process_tile<0, 1, 1, 3>, - Transform::template process_tile<0, 1, 1, 4>, + Transform::template process_tile<0, 2, 1, 0>, // Bottom left + Transform::template process_tile<0, 2, 1, 1>, + Transform::template process_tile<0, 2, 1, 2>, + Transform::template process_tile<0, 2, 1, 3>, + Transform::template process_tile<0, 2, 1, 4>, }, { - Transform::template process_tile<0, 1, 2, 0>, // " " - Transform::template process_tile<0, 1, 2, 1>, - Transform::template process_tile<0, 1, 2, 2>, - Transform::template process_tile<0, 1, 2, 3>, - Transform::template process_tile<0, 1, 2, 4>, + Transform::template process_tile<0, 2, 2, 0>, // " " + Transform::template process_tile<0, 2, 2, 1>, + Transform::template process_tile<0, 2, 2, 2>, + Transform::template process_tile<0, 2, 2, 3>, + Transform::template process_tile<0, 2, 2, 4>, }, { - Transform::template process_tile<0, 1, 3, 0>, // " " - Transform::template process_tile<0, 1, 3, 1>, - Transform::template process_tile<0, 1, 3, 2>, - Transform::template process_tile<0, 1, 3, 3>, - Transform::template process_tile<0, 1, 3, 4>, + Transform::template process_tile<0, 2, 3, 0>, // " " + Transform::template process_tile<0, 2, 3, 1>, + Transform::template process_tile<0, 2, 3, 2>, + Transform::template process_tile<0, 2, 3, 3>, + Transform::template process_tile<0, 2, 3, 4>, }, { - Transform::template process_tile<0, 1, 4, 0>, // " " - Transform::template process_tile<0, 1, 4, 1>, - Transform::template process_tile<0, 1, 4, 2>, - Transform::template process_tile<0, 1, 4, 3>, - Transform::template process_tile<0, 1, 4, 4>, + Transform::template process_tile<0, 2, 4, 0>, // " " + Transform::template process_tile<0, 2, 4, 1>, + Transform::template process_tile<0, 2, 4, 2>, + Transform::template process_tile<0, 2, 4, 3>, + Transform::template process_tile<0, 2, 4, 4>, } } }, { { { - Transform::template process_tile<1, 0, 0, 0>, // Top - Transform::template process_tile<1, 0, 0, 1>, // Top right - Transform::template process_tile<1, 0, 0, 2>, // " " - Transform::template process_tile<1, 0, 0, 3>, // " " - Transform::template process_tile<1, 0, 0, 4>, // " " + Transform::template process_tile<2, 0, 0, 0>, // Top + Transform::template process_tile<2, 0, 0, 1>, // Top right + Transform::template process_tile<2, 0, 0, 2>, // " " + Transform::template process_tile<2, 0, 0, 3>, // " " + Transform::template process_tile<2, 0, 0, 4>, // " " }, { - Transform::template process_tile<1, 0, 1, 0>, - Transform::template process_tile<1, 0, 1, 1>, - Transform::template process_tile<1, 0, 1, 2>, - Transform::template process_tile<1, 0, 1, 3>, - Transform::template process_tile<1, 0, 1, 4>, + Transform::template process_tile<2, 0, 1, 0>, + Transform::template process_tile<2, 0, 1, 1>, + Transform::template process_tile<2, 0, 1, 2>, + Transform::template process_tile<2, 0, 1, 3>, + Transform::template process_tile<2, 0, 1, 4>, }, { - Transform::template process_tile<1, 0, 2, 0>, - Transform::template process_tile<1, 0, 2, 1>, - Transform::template process_tile<1, 0, 2, 2>, - Transform::template process_tile<1, 0, 2, 3>, - Transform::template process_tile<1, 0, 2, 4>, + Transform::template process_tile<2, 0, 2, 0>, + Transform::template process_tile<2, 0, 2, 1>, + Transform::template process_tile<2, 0, 2, 2>, + Transform::template process_tile<2, 0, 2, 3>, + Transform::template process_tile<2, 0, 2, 4>, }, { - Transform::template process_tile<1, 0, 3, 0>, - Transform::template process_tile<1, 0, 3, 1>, - Transform::template process_tile<1, 0, 3, 2>, - Transform::template process_tile<1, 0, 3, 3>, - Transform::template process_tile<1, 0, 3, 4>, + Transform::template process_tile<2, 0, 3, 0>, + Transform::template process_tile<2, 0, 3, 1>, + Transform::template process_tile<2, 0, 3, 2>, + Transform::template process_tile<2, 0, 3, 3>, + Transform::template process_tile<2, 0, 3, 4>, }, { - Transform::template process_tile<1, 0, 4, 0>, - Transform::template process_tile<1, 0, 4, 1>, - Transform::template process_tile<1, 0, 4, 2>, - Transform::template process_tile<1, 0, 4, 3>, - Transform::template process_tile<1, 0, 4, 4>, + Transform::template process_tile<2, 0, 4, 0>, + Transform::template process_tile<2, 0, 4, 1>, + Transform::template process_tile<2, 0, 4, 2>, + Transform::template process_tile<2, 0, 4, 3>, + Transform::template process_tile<2, 0, 4, 4>, }, }, { { - Transform::template process_tile<1, 1, 0, 0>, // Top left - Transform::template process_tile<1, 1, 0, 1>, - Transform::template process_tile<1, 1, 0, 2>, - Transform::template process_tile<1, 1, 0, 3>, - Transform::template process_tile<1, 1, 0, 4>, + Transform::template process_tile<2, 2, 0, 0>, // Top left + Transform::template process_tile<2, 2, 0, 1>, + Transform::template process_tile<2, 2, 0, 2>, + Transform::template process_tile<2, 2, 0, 3>, + Transform::template process_tile<2, 2, 0, 4>, }, { - Transform::template process_tile<1, 1, 1, 0>, - Transform::template process_tile<1, 1, 1, 1>, - Transform::template process_tile<1, 1, 1, 2>, - Transform::template process_tile<1, 1, 1, 3>, - Transform::template process_tile<1, 1, 1, 4>, + Transform::template process_tile<2, 2, 1, 0>, + Transform::template process_tile<2, 2, 1, 1>, + Transform::template process_tile<2, 2, 1, 2>, + Transform::template process_tile<2, 2, 1, 3>, + Transform::template process_tile<2, 2, 1, 4>, }, { - Transform::template process_tile<1, 1, 2, 0>, - Transform::template process_tile<1, 1, 2, 1>, - Transform::template process_tile<1, 1, 2, 2>, - Transform::template process_tile<1, 1, 2, 3>, - Transform::template process_tile<1, 1, 2, 4>, + Transform::template process_tile<2, 2, 2, 0>, + Transform::template process_tile<2, 2, 2, 1>, + Transform::template process_tile<2, 2, 2, 2>, + Transform::template process_tile<2, 2, 2, 3>, + Transform::template process_tile<2, 2, 2, 4>, }, { - Transform::template process_tile<1, 1, 3, 0>, - Transform::template process_tile<1, 1, 3, 1>, - Transform::template process_tile<1, 1, 3, 2>, - Transform::template process_tile<1, 1, 3, 3>, - Transform::template process_tile<1, 1, 3, 4>, + Transform::template process_tile<2, 2, 3, 0>, + Transform::template process_tile<2, 2, 3, 1>, + Transform::template process_tile<2, 2, 3, 2>, + Transform::template process_tile<2, 2, 3, 3>, + Transform::template process_tile<2, 2, 3, 4>, }, { - Transform::template process_tile<1, 1, 4, 0>, - Transform::template process_tile<1, 1, 4, 1>, - Transform::template process_tile<1, 1, 4, 2>, - Transform::template process_tile<1, 1, 4, 3>, - Transform::template process_tile<1, 1, 4, 4>, + Transform::template process_tile<2, 2, 4, 0>, + Transform::template process_tile<2, 2, 4, 1>, + Transform::template process_tile<2, 2, 4, 2>, + Transform::template process_tile<2, 2, 4, 3>, + Transform::template process_tile<2, 2, 4, 4>, } } } diff --git a/tests/datasets/SmallConvolutionLayerDataset.h b/tests/datasets/SmallConvolutionLayerDataset.h index d88efbd21b..adb61de8e2 100644 --- a/tests/datasets/SmallConvolutionLayerDataset.h +++ b/tests/datasets/SmallConvolutionLayerDataset.h @@ -51,6 +51,7 @@ public: // Kernel size 5 add_config(TensorShape(8U, 8U, 2U), TensorShape(5U, 5U, 2U, 1U), TensorShape(1U), TensorShape(4U, 4U, 1U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(8U, 8U, 2U), TensorShape(5U, 5U, 2U), TensorShape(1U), TensorShape(8U, 8U, 1U), PadStrideInfo(1, 1, 2, 2)); } }; |