MLCE-36: FC tranpose weights

Change-Id: I3b8a6c00e61ba6da459ca5fc7275393f9d073aed Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/142533 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2018-08-02 17:18:51 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:54:54 +0000
commit: 195b0ba457d0020e1f54fb0c0378040e1c75d510 (patch)
tree: 220244e5390a1c0636616c109b4254b85d574458 /src
parent: 57c48243af8fd4b503fe629166531299c1b083d3 (diff)
download: ComputeLibrary-195b0ba457d0020e1f54fb0c0378040e1c75d510.tar.gz
4 files changed, 10 insertions, 3 deletions
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index 996af1dc90..7b09ec98f5 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -436,7 +436,7 @@ NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, Node
     const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
 
     // Create weights node
-    TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs, weights_quant_info);
+    TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs, fc_info, weights_quant_info);
     NodeID           w_nid  = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
 
     // Create bias nodes
diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp
index f41e1a3c22..6ea0292505 100644
--- a/src/graph/nodes/FullyConnectedLayer.cpp
+++ b/src/graph/nodes/FullyConnectedLayer.cpp
@@ -40,6 +40,7 @@ FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs, Quant
 
 TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const TensorDescriptor &input_descriptor,
                                                                      unsigned int            num_outputs,
+                                                                     FullyConnectedLayerInfo fc_info,
                                                                      QuantizationInfo        weights_quant_info)
 {
     unsigned int num_weights    = 1;
@@ -57,6 +58,12 @@ TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const Tenso
     TensorDescriptor weights_descriptor = input_descriptor;
     weights_descriptor.shape            = TensorShape(num_weights, num_outputs);
 
+    // If weights are tranposed, use tranposed shape
+    if(!fc_info.transpose_weights)
+    {
+        weights_descriptor.shape = TensorShape(num_outputs, num_weights);
+    }
+
     // Set quantization info if present
     if(!weights_quant_info.empty())
     {
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index ccd7813fbc..6fd78a3fc9 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -256,7 +256,7 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
 
     const ITensorInfo &im2col_input      = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_fc_shape(input)).set_data_layout(DataLayout::NCHW));
     const ITensorInfo &reshaped_weights  = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
-    const ITensorInfo &converted_weights = TensorInfo(reshaped_weights.clone()->set_is_resizable(true).reset_padding());
+    const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());
     const ITensorInfo &gemmlowp_output   = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
 
     // Configure accumulate biases kernel for non quantized asymmetric types
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index 34cabb5c2e..25b8adc431 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -251,7 +251,7 @@ Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
 
     const ITensorInfo &im2col_input      = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_fc_shape(input)));
     const ITensorInfo &reshaped_weights  = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
-    const ITensorInfo &converted_weights = TensorInfo(reshaped_weights.clone()->set_is_resizable(true).reset_padding());
+    const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());
     const ITensorInfo &gemmlowp_output   = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
 
     // Configure accumulate biases kernel for non quantized asymmetric types
author	Georgios Pinitas <georgios.pinitas@arm.com>	2018-08-02 17:18:51 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:54:54 +0000
commit	195b0ba457d0020e1f54fb0c0378040e1c75d510 (patch)
tree	220244e5390a1c0636616c109b4254b85d574458 /src
parent	57c48243af8fd4b503fe629166531299c1b083d3 (diff)
download	ComputeLibrary-195b0ba457d0020e1f54fb0c0378040e1c75d510.tar.gz