COMPMID-1550: Winograd integrate RSH changes.

Refactors the transforms to make use of partial specialization. Change-Id: Idff68d22817a00a7ee9eef5351a5a9fd33147540 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/146635 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
author: Pablo Tello <pablo.tello@arm.com> 2018-09-03 11:40:33 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:54:54 +0000
commit: 72686fa6ee0f04d458ed2274b4d34917628ef14d (patch)
tree: 7b897efdc535ef7cea8826d36fae951a3c53438e /src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_3x3_fp32.cpp
parent: 0d2b48c4a2cc82fd3312635a97117553ea4ee735 (diff)
download: ComputeLibrary-72686fa6ee0f04d458ed2274b4d34917628ef14d.tar.gz
1 files changed, 2 insertions, 19 deletions
diff --git a/src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_3x3_fp32.cpp b/src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_3x3_fp32.cpp
index 97b2695d69..a9d5d52d15 100644
--- a/src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_3x3_fp32.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_3x3_fp32.cpp
@@ -29,22 +29,7 @@
 namespace winograd
 {
 
-using Transform = WinogradGEMM<2, 2, 3, 3>::InputTransform<float>;
-
-/******************************************************************************
- * Cost methods for the input transform.
- * =====================================
- */
-template <>
-template <>
-int Transform::ops_performed(const Tensor4DShape &input_shape)
-{
-  // NOTE: Cost in FLOPs rather than instructions or uops.
-  const int tile_M = iceildiv(input_shape.n_rows, inner_tile_rows);
-  const int tile_N = iceildiv(input_shape.n_cols, inner_tile_cols);
-  return 16 * 16 * tile_M * tile_N * input_shape.n_channels;
-}
-/*****************************************************************************/
+using Transform = InputTransformImpl<3, 3, 4, 4, float>;
 
 /*****************************************************************************
 * F(2x2, 3x3) implies the use of a 4x4 input tile. Such tiles can require a
@@ -100,7 +85,6 @@ int Transform::ops_performed(const Tensor4DShape &input_shape)
 *     Padding right in {0, 1, 2}
 */
 template <>
-template <>
 template <int pad_top, int pad_left, int pad_bottom, int pad_right>
 void Transform::process_tile(
   int n_channels,
@@ -328,7 +312,6 @@ void Transform::process_tile(
 }
 
 template <>
-template <>
 const Transform::TileFn Transform::tile_fns[n_pad_top][n_pad_left][n_pad_bottom][n_pad_right] =
 {
   {
@@ -405,5 +388,5 @@ const Transform::TileFn Transform::tile_fns[n_pad_top][n_pad_left][n_pad_bottom]
   }
 };
 
-template struct WinogradGEMM<2, 2, 3, 3>::InputTransform<float>;
+template class InputTransform<3, 3, 4, 4, float>;
 }  // namespace winograd
author	Pablo Tello <pablo.tello@arm.com>	2018-09-03 11:40:33 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:54:54 +0000
commit	72686fa6ee0f04d458ed2274b4d34917628ef14d (patch)
tree	7b897efdc535ef7cea8826d36fae951a3c53438e /src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_3x3_fp32.cpp
parent	0d2b48c4a2cc82fd3312635a97117553ea4ee735 (diff)
download	ComputeLibrary-72686fa6ee0f04d458ed2274b4d34917628ef14d.tar.gz