From 195b0ba457d0020e1f54fb0c0378040e1c75d510 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Thu, 2 Aug 2018 17:18:51 +0100
Subject: MLCE-36: FC tranpose weights

Change-Id: I3b8a6c00e61ba6da459ca5fc7275393f9d073aed
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/142533
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/Types.h                             | 11 +++++++++++
 arm_compute/graph/frontend/Types.h                   |  1 +
 arm_compute/graph/nodes/FullyConnectedLayerNode.h    |  2 ++
 src/graph/GraphBuilder.cpp                           |  2 +-
 src/graph/nodes/FullyConnectedLayer.cpp              |  7 +++++++
 src/runtime/CL/functions/CLFullyConnectedLayer.cpp   |  2 +-
 src/runtime/NEON/functions/NEFullyConnectedLayer.cpp |  2 +-
 7 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 81d652dd7d..d9109e4565 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -701,6 +701,17 @@ struct FullyConnectedLayerInfo
         weights_trained_layout = layout;
         return *this;
     }
+    /** Sets the transpose weights flag
+     *
+     * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed
+     *
+     * @return Updated object
+     */
+    FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights)
+    {
+        transpose_weights = should_transpose_weights;
+        return *this;
+    }
 };
 
 /** Pooling Layer Information class */
diff --git a/arm_compute/graph/frontend/Types.h b/arm_compute/graph/frontend/Types.h
index 8f6312f318..ebbf7101ac 100644
--- a/arm_compute/graph/frontend/Types.h
+++ b/arm_compute/graph/frontend/Types.h
@@ -39,6 +39,7 @@ using graph::TensorShape;
 using graph::PermutationVector;
 
 using graph::ActivationLayerInfo;
+using graph::FullyConnectedLayerInfo;
 using graph::NormalizationLayerInfo;
 using graph::NormType;
 using graph::PadStrideInfo;
diff --git a/arm_compute/graph/nodes/FullyConnectedLayerNode.h b/arm_compute/graph/nodes/FullyConnectedLayerNode.h
index 1bff6006c8..33f9b1eefe 100644
--- a/arm_compute/graph/nodes/FullyConnectedLayerNode.h
+++ b/arm_compute/graph/nodes/FullyConnectedLayerNode.h
@@ -49,12 +49,14 @@ public:
      *
      * @param[in] input_descriptor   Input descriptor
      * @param[in] num_outputs        Number of output neurons
+     * @param[in] fc_info            (Optional) Additional information about the fully connected layer
      * @param[in] weights_quant_info (Optional) Weights quantization info
      *
      * @return Weights descriptor
      */
     static TensorDescriptor compute_weights_descriptor(const TensorDescriptor &input_descriptor,
                                                        unsigned int            num_outputs,
+                                                       FullyConnectedLayerInfo fc_info            = FullyConnectedLayerInfo(),
                                                        QuantizationInfo        weights_quant_info = QuantizationInfo());
     /** Computes fully connected layer output descriptor
      *
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index 996af1dc90..7b09ec98f5 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -436,7 +436,7 @@ NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, Node
     const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
 
     // Create weights node
-    TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs, weights_quant_info);
+    TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs, fc_info, weights_quant_info);
     NodeID           w_nid  = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
 
     // Create bias nodes
diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp
index f41e1a3c22..6ea0292505 100644
--- a/src/graph/nodes/FullyConnectedLayer.cpp
+++ b/src/graph/nodes/FullyConnectedLayer.cpp
@@ -40,6 +40,7 @@ FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs, Quant
 
 TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const TensorDescriptor &input_descriptor,
                                                                      unsigned int            num_outputs,
+                                                                     FullyConnectedLayerInfo fc_info,
                                                                      QuantizationInfo        weights_quant_info)
 {
     unsigned int num_weights    = 1;
@@ -57,6 +58,12 @@ TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const Tenso
     TensorDescriptor weights_descriptor = input_descriptor;
     weights_descriptor.shape            = TensorShape(num_weights, num_outputs);
 
+    // If weights are tranposed, use tranposed shape
+    if(!fc_info.transpose_weights)
+    {
+        weights_descriptor.shape = TensorShape(num_outputs, num_weights);
+    }
+
     // Set quantization info if present
     if(!weights_quant_info.empty())
     {
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index ccd7813fbc..6fd78a3fc9 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -256,7 +256,7 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
 
     const ITensorInfo &im2col_input      = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_fc_shape(input)).set_data_layout(DataLayout::NCHW));
     const ITensorInfo &reshaped_weights  = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
-    const ITensorInfo &converted_weights = TensorInfo(reshaped_weights.clone()->set_is_resizable(true).reset_padding());
+    const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());
     const ITensorInfo &gemmlowp_output   = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
 
     // Configure accumulate biases kernel for non quantized asymmetric types
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index 34cabb5c2e..25b8adc431 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -251,7 +251,7 @@ Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
 
     const ITensorInfo &im2col_input      = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_fc_shape(input)));
     const ITensorInfo &reshaped_weights  = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
-    const ITensorInfo &converted_weights = TensorInfo(reshaped_weights.clone()->set_is_resizable(true).reset_padding());
+    const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());
     const ITensorInfo &gemmlowp_output   = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
 
     // Configure accumulate biases kernel for non quantized asymmetric types
-- 
cgit v1.2.1