aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-08-02 17:18:51 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit195b0ba457d0020e1f54fb0c0378040e1c75d510 (patch)
tree220244e5390a1c0636616c109b4254b85d574458 /src
parent57c48243af8fd4b503fe629166531299c1b083d3 (diff)
downloadComputeLibrary-195b0ba457d0020e1f54fb0c0378040e1c75d510.tar.gz
MLCE-36: FC tranpose weights
Change-Id: I3b8a6c00e61ba6da459ca5fc7275393f9d073aed Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/142533 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/graph/GraphBuilder.cpp2
-rw-r--r--src/graph/nodes/FullyConnectedLayer.cpp7
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEFullyConnectedLayer.cpp2
4 files changed, 10 insertions, 3 deletions
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index 996af1dc90..7b09ec98f5 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -436,7 +436,7 @@ NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, Node
const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
// Create weights node
- TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs, weights_quant_info);
+ TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs, fc_info, weights_quant_info);
NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
// Create bias nodes
diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp
index f41e1a3c22..6ea0292505 100644
--- a/src/graph/nodes/FullyConnectedLayer.cpp
+++ b/src/graph/nodes/FullyConnectedLayer.cpp
@@ -40,6 +40,7 @@ FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs, Quant
TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const TensorDescriptor &input_descriptor,
unsigned int num_outputs,
+ FullyConnectedLayerInfo fc_info,
QuantizationInfo weights_quant_info)
{
unsigned int num_weights = 1;
@@ -57,6 +58,12 @@ TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const Tenso
TensorDescriptor weights_descriptor = input_descriptor;
weights_descriptor.shape = TensorShape(num_weights, num_outputs);
+ // If weights are tranposed, use tranposed shape
+ if(!fc_info.transpose_weights)
+ {
+ weights_descriptor.shape = TensorShape(num_outputs, num_weights);
+ }
+
// Set quantization info if present
if(!weights_quant_info.empty())
{
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index ccd7813fbc..6fd78a3fc9 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -256,7 +256,7 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
const ITensorInfo &im2col_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_fc_shape(input)).set_data_layout(DataLayout::NCHW));
const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
- const ITensorInfo &converted_weights = TensorInfo(reshaped_weights.clone()->set_is_resizable(true).reset_padding());
+ const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());
const ITensorInfo &gemmlowp_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
// Configure accumulate biases kernel for non quantized asymmetric types
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index 34cabb5c2e..25b8adc431 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -251,7 +251,7 @@ Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
const ITensorInfo &im2col_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_fc_shape(input)));
const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
- const ITensorInfo &converted_weights = TensorInfo(reshaped_weights.clone()->set_is_resizable(true).reset_padding());
+ const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());
const ITensorInfo &gemmlowp_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
// Configure accumulate biases kernel for non quantized asymmetric types