From 8951933e5dd7be8d922affea3cc23a48a05b694d Mon Sep 17 00:00:00 2001
From: Pablo Tello <pablo.tello@arm.com>
Date: Fri, 17 Nov 2017 11:52:36 +0000
Subject: COMPMID-687: Winograd layer.

Change-Id: Ica682d08e851491bf4a26b8d17908c014844055e
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110990
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
---
 arm_compute/core/NEON/kernels/winograd/shims.hpp | 319 +++++++++++++++++++++++
 1 file changed, 319 insertions(+)
 create mode 100644 arm_compute/core/NEON/kernels/winograd/shims.hpp

(limited to 'arm_compute/core/NEON/kernels/winograd/shims.hpp')
diff --git a/arm_compute/core/NEON/kernels/winograd/shims.hpp b/arm_compute/core/NEON/kernels/winograd/shims.hpp
new file mode 100644
index 0000000000..249e5757f0
--- /dev/null
+++ b/arm_compute/core/NEON/kernels/winograd/shims.hpp
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+/** Re-order a weight tensor from [Output feature map x Input feature map x
+ *  Height x Width] format to [Height x Width x Input feature map x Output
+ *  feature map] format.
+ */
+template <typename T>
+inline void ofm_ifm_h_w_to_h_w_ifm_ofm(
+  const T* const in,  // Input in [Output x Input x Height x Width] form
+  T* const out,       // Output in [Height x Width x Input x Output] form
+  const int n_output_feature_maps,
+  const int n_input_feature_maps,
+  const int n_rows,
+  const int n_cols,
+  int in_output_feature_map_stride=0,
+  int in_input_feature_map_stride=0,
+  int in_row_stride=0,
+  int out_row_stride=0,
+  int out_col_stride=0,
+  int out_input_feature_map_stride=0
+);
+
+/** Re-order a weight tensor from [Height x Width x Input feature map x Output
+ *  feature map] format to [Output feature map x Input feature map x Height x
+ *  Width] format.
+ */
+template <typename T>
+inline void h_w_ifm_ofm_to_ofm_ifm_h_w(
+  const T* const in,  // Input in [Height x Width x Input x Output] form
+  T* const out,       // Output in [Output x Input x Height x Width] form
+  const int n_rows,
+  const int n_cols,
+  const int n_input_feature_maps,
+  const int n_output_feature_maps,
+  int in_row_stride=0,
+  int in_col_stride=0,
+  int in_input_feature_map_stride=0,
+  int out_output_feature_map_stride=0,
+  int out_input_feature_map_stride=0,
+  int out_row_stride=0
+);
+
+
+/* Re-order a tensor from NCHW format to NHWC.
+ */
+template <typename T>
+inline void nchw_to_nhwc(
+  const T* const in,
+  T* const out,
+  const int n_batches,
+  const int n_channels,
+  const int n_rows,
+  const int n_cols,
+  int in_batch_stride=0,
+  int in_channel_stride=0,
+  int in_row_stride=0,
+  int out_batch_stride=0,
+  int out_row_stride=0,
+  int out_col_stride=0
+)
+{
+  // Fill in the stride values
+  in_row_stride = (in_row_stride) ? in_row_stride : n_cols;
+  in_channel_stride = (in_channel_stride) ? in_channel_stride
+                                          : n_rows * in_row_stride;
+  in_batch_stride = (in_batch_stride) ? in_batch_stride
+                                      : n_channels * in_channel_stride;
+
+  out_col_stride = (out_col_stride) ? out_col_stride : n_channels;
+  out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride;
+  out_batch_stride = (out_batch_stride) ? out_batch_stride
+                                        : n_rows * out_row_stride;
+
+  // Perform the re-ordering
+  for (int n = 0; n < n_batches; n++)
+  {
+    const T* const in_batch = in + n*in_batch_stride;
+    T* const out_batch = out + n*out_batch_stride;
+
+    for (int i = 0; i < n_rows; i++)
+    {
+      const T* const in_row = in_batch + i*in_row_stride;
+      T* const out_row = out_batch + i*out_row_stride;
+
+      for (int j = 0; j < n_cols; j++)
+      {
+        const T* const in_col = in_row + j;
+        T* const out_col = out_row + j*out_col_stride;
+
+        for (int c = 0; c < n_channels; c++)
+        {
+          const T* const in_channel = in_col + c*in_channel_stride;
+          out_col[c] = *(in_channel);
+        }
+      }
+    }
+  }
+}
+
+/* Re-order a tensor from NHWC format to NCHW.
+ */
+template <typename T>
+inline void nhwc_to_nchw(
+  const T* const in,  // Input data in NHWC form
+  T* const out,       // Output data in NCHW form
+  const int n_batches,
+  const int n_rows,
+  const int n_cols,
+  const int n_channels,
+  int in_batch_stride=0,
+  int in_row_stride=0,
+  int in_col_stride=0,
+  int out_batch_stride=0,
+  int out_channel_stride=0,
+  int out_row_stride=0
+)
+{
+  // Fill in stride values
+  in_col_stride = (in_col_stride) ? in_col_stride : n_channels;
+  in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride;
+  in_batch_stride = (in_batch_stride) ? in_batch_stride
+                                      : n_rows * in_row_stride;
+
+  out_row_stride = (out_row_stride) ? out_row_stride : n_cols;
+  out_channel_stride = (out_channel_stride) ? out_channel_stride
+                                            : n_rows * out_row_stride;
+  out_batch_stride = (out_batch_stride) ? out_batch_stride
+                                        : n_channels * out_channel_stride;
+
+  // Perform the re-ordering
+  // For every batch
+  for (int n = 0; n < n_batches; n++)
+  {
+    const T* const in_batch = in + n*in_batch_stride;
+    T* const out_batch = out + n*out_batch_stride;
+
+    // For every row
+    for (int i = 0; i < n_rows; i++)
+    {
+      const T* const in_i = in_batch + i*in_row_stride;
+      T* const out_i = out_batch + i*out_row_stride;
+
+      // For every column
+      for (int j = 0; j < n_cols; j++)
+      {
+        const T* const in_j = in_i + j*in_col_stride;
+        T* const out_j = out_i + j;
+
+        // For every channel
+        for (int c = 0; c < n_channels; c++)
+        {
+          const T* const in_channel = in_j + c;
+          T* const out_channel = out_j + c*out_channel_stride;
+          *(out_channel) = *(in_channel);
+        }
+      }
+    }
+  }
+}
+
+
+/*****************************************************************************/
+/* Generic weight re-order implementation.
+ */
+template <typename T>
+inline void ofm_ifm_h_w_to_h_w_ifm_ofm(
+  const T* const in,  // Input in [Output x Input x Height x Width] form
+  T* const out,       // Output in [Height x Width x Input x Output] form
+  const int n_output_feature_maps,
+  const int n_input_feature_maps,
+  const int n_rows,
+  const int n_cols,
+  int in_output_feature_map_stride,
+  int in_input_feature_map_stride,
+  int in_row_stride,
+  int out_row_stride,
+  int out_col_stride,
+  int out_input_feature_map_stride
+)
+{
+  // Fill in stride values
+  in_row_stride = (in_row_stride)
+    ? in_row_stride
+    : n_cols;
+  in_input_feature_map_stride = (in_input_feature_map_stride)
+    ? in_input_feature_map_stride
+    : n_rows * in_row_stride;
+  in_output_feature_map_stride = (in_output_feature_map_stride)
+    ? in_output_feature_map_stride
+    : n_input_feature_maps * in_input_feature_map_stride;
+
+  out_input_feature_map_stride = (out_input_feature_map_stride)
+    ? out_input_feature_map_stride
+    : n_output_feature_maps;
+  out_col_stride = (out_col_stride)
+    ? out_col_stride
+    : n_input_feature_maps * out_input_feature_map_stride;
+  out_row_stride = (out_row_stride)
+    ? out_row_stride
+    : n_cols * out_col_stride;
+
+  // Perform the re-ordering
+  for (int i = 0; i < n_rows; i++)
+  {
+    const T* const in_row = in + i * in_row_stride;
+    T* out_row = out + i * out_row_stride;
+
+    for (int j = 0; j < n_cols; j++)
+    {
+      const T* const in_col = in_row + j;
+      T* const out_col = out_row + j * out_col_stride;
+
+      for (int ifm = 0; ifm < n_input_feature_maps; ifm++)
+      {
+        const T* const in_ifm = in_col + ifm * in_input_feature_map_stride;
+        T* const out_ifm = out_col + ifm * out_input_feature_map_stride;
+
+        for (int ofm = 0; ofm < n_output_feature_maps; ofm++)
+        {
+          const T* const in_ofm = in_ifm + ofm * in_output_feature_map_stride;
+          T* const out_ofm = out_ifm + ofm;
+          *(out_ofm) = *(in_ofm);
+        }
+      }
+    }
+  }
+}
+
+/*****************************************************************************/
+/* Generic weight re-order implementation.
+ */
+template <typename T>
+inline void h_w_ifm_ofm_to_ofm_ifm_h_w(
+  const T* const in,  // Input in [Height x Width x Input x Output] form
+  T* const out,       // Output in [Output x Input x Height x Width] form
+  const int n_rows,
+  const int n_cols,
+  const int n_input_feature_maps,
+  const int n_output_feature_maps,
+  int in_row_stride,
+  int in_col_stride,
+  int in_input_feature_map_stride,
+  int out_output_feature_map_stride,
+  int out_input_feature_map_stride,
+  int out_row_stride
+)
+{
+  // Fill in the stride values
+  in_input_feature_map_stride = (in_input_feature_map_stride)
+    ? in_input_feature_map_stride
+    : n_output_feature_maps;
+  in_col_stride = (in_col_stride)
+    ? in_col_stride
+    : n_input_feature_maps * in_input_feature_map_stride;
+  in_row_stride = (in_row_stride)
+    ? in_row_stride
+    : n_cols * in_col_stride;
+
+  out_row_stride = (out_row_stride)
+    ? out_row_stride
+    : n_cols;
+  out_input_feature_map_stride = (out_input_feature_map_stride)
+    ? out_input_feature_map_stride
+    : n_rows * out_row_stride;
+  out_output_feature_map_stride = (out_output_feature_map_stride)
+    ? out_output_feature_map_stride
+    : n_input_feature_maps * out_input_feature_map_stride;
+
+  // Perform the re-ordering
+  for (int i = 0; i < n_rows; i++)
+  {
+    const T* const in_row = in + i * in_row_stride;
+    T* const out_row = out + i * out_row_stride;
+
+    for (int j = 0; j < n_cols; j++)
+    {
+      const T* const in_col = in_row + j * in_col_stride;
+      T* const out_col = out_row + j;
+
+      for (int ifm = 0; ifm < n_input_feature_maps; ifm++)
+      {
+        const T* const in_ifm = in_col + ifm * in_input_feature_map_stride;
+        T* const out_ifm = out_col + ifm * out_input_feature_map_stride;
+
+        for (int ofm = 0; ofm < n_output_feature_maps; ofm++)
+        {
+          const T* const in_ofm = in_ifm + ofm;
+          T* const out_ofm = out_ifm + ofm * out_output_feature_map_stride;
+          *(out_ofm) = *(in_ofm);
+        }
+      }
+    }
+  }
+}
+
-- 
cgit v1.2.1