From 195b0ba457d0020e1f54fb0c0378040e1c75d510 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 2 Aug 2018 17:18:51 +0100 Subject: MLCE-36: FC tranpose weights Change-Id: I3b8a6c00e61ba6da459ca5fc7275393f9d073aed Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/142533 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- arm_compute/core/Types.h | 11 +++++++++++ arm_compute/graph/frontend/Types.h | 1 + arm_compute/graph/nodes/FullyConnectedLayerNode.h | 2 ++ src/graph/GraphBuilder.cpp | 2 +- src/graph/nodes/FullyConnectedLayer.cpp | 7 +++++++ src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 2 +- src/runtime/NEON/functions/NEFullyConnectedLayer.cpp | 2 +- 7 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 81d652dd7d..d9109e4565 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -701,6 +701,17 @@ struct FullyConnectedLayerInfo weights_trained_layout = layout; return *this; } + /** Sets the transpose weights flag + * + * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed + * + * @return Updated object + */ + FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights) + { + transpose_weights = should_transpose_weights; + return *this; + } }; /** Pooling Layer Information class */ diff --git a/arm_compute/graph/frontend/Types.h b/arm_compute/graph/frontend/Types.h index 8f6312f318..ebbf7101ac 100644 --- a/arm_compute/graph/frontend/Types.h +++ b/arm_compute/graph/frontend/Types.h @@ -39,6 +39,7 @@ using graph::TensorShape; using graph::PermutationVector; using graph::ActivationLayerInfo; +using graph::FullyConnectedLayerInfo; using graph::NormalizationLayerInfo; using graph::NormType; using graph::PadStrideInfo; diff --git a/arm_compute/graph/nodes/FullyConnectedLayerNode.h b/arm_compute/graph/nodes/FullyConnectedLayerNode.h index 1bff6006c8..33f9b1eefe 100644 --- a/arm_compute/graph/nodes/FullyConnectedLayerNode.h +++ b/arm_compute/graph/nodes/FullyConnectedLayerNode.h @@ -49,12 +49,14 @@ public: * * @param[in] input_descriptor Input descriptor * @param[in] num_outputs Number of output neurons + * @param[in] fc_info (Optional) Additional information about the fully connected layer * @param[in] weights_quant_info (Optional) Weights quantization info * * @return Weights descriptor */ static TensorDescriptor compute_weights_descriptor(const TensorDescriptor &input_descriptor, unsigned int num_outputs, + FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(), QuantizationInfo weights_quant_info = QuantizationInfo()); /** Computes fully connected layer output descriptor * diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp index 996af1dc90..7b09ec98f5 100644 --- a/src/graph/GraphBuilder.cpp +++ b/src/graph/GraphBuilder.cpp @@ -436,7 +436,7 @@ NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, Node const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]); // Create weights node - TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs, weights_quant_info); + TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs, fc_info, weights_quant_info); NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor)); // Create bias nodes diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp index f41e1a3c22..6ea0292505 100644 --- a/src/graph/nodes/FullyConnectedLayer.cpp +++ b/src/graph/nodes/FullyConnectedLayer.cpp @@ -40,6 +40,7 @@ FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs, Quant TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const TensorDescriptor &input_descriptor, unsigned int num_outputs, + FullyConnectedLayerInfo fc_info, QuantizationInfo weights_quant_info) { unsigned int num_weights = 1; @@ -57,6 +58,12 @@ TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const Tenso TensorDescriptor weights_descriptor = input_descriptor; weights_descriptor.shape = TensorShape(num_weights, num_outputs); + // If weights are tranposed, use tranposed shape + if(!fc_info.transpose_weights) + { + weights_descriptor.shape = TensorShape(num_outputs, num_weights); + } + // Set quantization info if present if(!weights_quant_info.empty()) { diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index ccd7813fbc..6fd78a3fc9 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -256,7 +256,7 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn const ITensorInfo &im2col_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_fc_shape(input)).set_data_layout(DataLayout::NCHW)); const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights))); - const ITensorInfo &converted_weights = TensorInfo(reshaped_weights.clone()->set_is_resizable(true).reset_padding()); + const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone()); const ITensorInfo &gemmlowp_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); // Configure accumulate biases kernel for non quantized asymmetric types diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index 34cabb5c2e..25b8adc431 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -251,7 +251,7 @@ Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn const ITensorInfo &im2col_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_fc_shape(input))); const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights))); - const ITensorInfo &converted_weights = TensorInfo(reshaped_weights.clone()->set_is_resizable(true).reset_padding()); + const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone()); const ITensorInfo &gemmlowp_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); // Configure accumulate biases kernel for non quantized asymmetric types -- cgit v1.2.1