aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-08-02 17:18:51 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit195b0ba457d0020e1f54fb0c0378040e1c75d510 (patch)
tree220244e5390a1c0636616c109b4254b85d574458
parent57c48243af8fd4b503fe629166531299c1b083d3 (diff)
downloadComputeLibrary-195b0ba457d0020e1f54fb0c0378040e1c75d510.tar.gz
MLCE-36: FC tranpose weights
Change-Id: I3b8a6c00e61ba6da459ca5fc7275393f9d073aed Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/142533 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/Types.h11
-rw-r--r--arm_compute/graph/frontend/Types.h1
-rw-r--r--arm_compute/graph/nodes/FullyConnectedLayerNode.h2
-rw-r--r--src/graph/GraphBuilder.cpp2
-rw-r--r--src/graph/nodes/FullyConnectedLayer.cpp7
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp2
-rw-r--r--src/runtime/NEON/functions/NEFullyConnectedLayer.cpp2
7 files changed, 24 insertions, 3 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 81d652dd7d..d9109e4565 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -701,6 +701,17 @@ struct FullyConnectedLayerInfo
weights_trained_layout = layout;
return *this;
}
+ /** Sets the transpose weights flag
+ *
+ * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed
+ *
+ * @return Updated object
+ */
+ FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights)
+ {
+ transpose_weights = should_transpose_weights;
+ return *this;
+ }
};
/** Pooling Layer Information class */
diff --git a/arm_compute/graph/frontend/Types.h b/arm_compute/graph/frontend/Types.h
index 8f6312f318..ebbf7101ac 100644
--- a/arm_compute/graph/frontend/Types.h
+++ b/arm_compute/graph/frontend/Types.h
@@ -39,6 +39,7 @@ using graph::TensorShape;
using graph::PermutationVector;
using graph::ActivationLayerInfo;
+using graph::FullyConnectedLayerInfo;
using graph::NormalizationLayerInfo;
using graph::NormType;
using graph::PadStrideInfo;
diff --git a/arm_compute/graph/nodes/FullyConnectedLayerNode.h b/arm_compute/graph/nodes/FullyConnectedLayerNode.h
index 1bff6006c8..33f9b1eefe 100644
--- a/arm_compute/graph/nodes/FullyConnectedLayerNode.h
+++ b/arm_compute/graph/nodes/FullyConnectedLayerNode.h
@@ -49,12 +49,14 @@ public:
*
* @param[in] input_descriptor Input descriptor
* @param[in] num_outputs Number of output neurons
+ * @param[in] fc_info (Optional) Additional information about the fully connected layer
* @param[in] weights_quant_info (Optional) Weights quantization info
*
* @return Weights descriptor
*/
static TensorDescriptor compute_weights_descriptor(const TensorDescriptor &input_descriptor,
unsigned int num_outputs,
+ FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(),
QuantizationInfo weights_quant_info = QuantizationInfo());
/** Computes fully connected layer output descriptor
*
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index 996af1dc90..7b09ec98f5 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -436,7 +436,7 @@ NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, Node
const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
// Create weights node
- TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs, weights_quant_info);
+ TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs, fc_info, weights_quant_info);
NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
// Create bias nodes
diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp
index f41e1a3c22..6ea0292505 100644
--- a/src/graph/nodes/FullyConnectedLayer.cpp
+++ b/src/graph/nodes/FullyConnectedLayer.cpp
@@ -40,6 +40,7 @@ FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs, Quant
TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const TensorDescriptor &input_descriptor,
unsigned int num_outputs,
+ FullyConnectedLayerInfo fc_info,
QuantizationInfo weights_quant_info)
{
unsigned int num_weights = 1;
@@ -57,6 +58,12 @@ TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const Tenso
TensorDescriptor weights_descriptor = input_descriptor;
weights_descriptor.shape = TensorShape(num_weights, num_outputs);
+ // If weights are tranposed, use tranposed shape
+ if(!fc_info.transpose_weights)
+ {
+ weights_descriptor.shape = TensorShape(num_outputs, num_weights);
+ }
+
// Set quantization info if present
if(!weights_quant_info.empty())
{
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index ccd7813fbc..6fd78a3fc9 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -256,7 +256,7 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
const ITensorInfo &im2col_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_fc_shape(input)).set_data_layout(DataLayout::NCHW));
const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
- const ITensorInfo &converted_weights = TensorInfo(reshaped_weights.clone()->set_is_resizable(true).reset_padding());
+ const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());
const ITensorInfo &gemmlowp_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
// Configure accumulate biases kernel for non quantized asymmetric types
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index 34cabb5c2e..25b8adc431 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -251,7 +251,7 @@ Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
const ITensorInfo &im2col_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_fc_shape(input)));
const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
- const ITensorInfo &converted_weights = TensorInfo(reshaped_weights.clone()->set_is_resizable(true).reset_padding());
+ const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());
const ITensorInfo &gemmlowp_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
// Configure accumulate biases kernel for non quantized asymmetric types