aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-10-28 14:16:31 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-10-30 16:56:48 +0000
commit44bfc3fe8dacfc4297702ca88323ea675a7c52e2 (patch)
treef7cc198a16af56b1ff2432972360c31dfbe13c3f
parent2697fd8fa42425f7bfdd60dd486d4c2132b06523 (diff)
downloadComputeLibrary-44bfc3fe8dacfc4297702ca88323ea675a7c52e2.tar.gz
COMPMID-1671: Allow fp mixed precision in CLFCLayer.
Adds the ability to request accumulation in float instead of half to avoid any accuracy related issues. Change-Id: I97de27fa36853834cd9eb69c0077e1cb1e6dd5ec Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/2173 Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
-rw-r--r--arm_compute/core/Types.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h6
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp58
3 files changed, 33 insertions, 32 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 0a25277b57..f4955ed457 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -805,6 +805,7 @@ struct FullyConnectedLayerInfo
bool transpose_weights{ true }; /**< Transpose weights if true. */
bool are_weights_reshaped{ false }; /**< Reshape the weights tensor if false. */
bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */
+ bool fp_mixed_precision{ false }; /**< Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */
/** Sets the weights trained data layout
*
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 7f872532e4..f284359663 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -174,9 +174,9 @@ public:
void prepare() override;
private:
- void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, bool retain_internal_weights);
- void configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, bool retain_internal_weights);
- void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, bool retain_internal_weights);
+ void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
+ void configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
+ void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
MemoryGroup _memory_group;
IWeightsManager *_weights_manager;
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index 6ad4cd5468..5bcf38d1c4 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -73,21 +73,21 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn
return Status{};
}
-Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo *bias, const ITensorInfo &output)
+Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo *bias, const ITensorInfo &output, const FullyConnectedLayerInfo &fc_info)
{
GEMMLowpOutputStageInfo gemmlowp_output_stage;
ARM_COMPUTE_RETURN_ON_ERROR(construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage));
- const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
- false, // is_b_reshaped
- true, // reshape_b_only_on_first_run
- 0, // depth_output_gemm3d
- false, // reinterpret_input_as_3d
- false, // retain_internal_weights
- gemmlowp_output_stage, // gemmlowp_output_stage
- false, // fp_mixed_precision
- true, // broadcast_bias
- ActivationLayerInfo()); // activation_info
+ const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
+ false, // is_b_reshaped
+ true, // reshape_b_only_on_first_run
+ 0, // depth_output_gemm3d
+ false, // reinterpret_input_as_3d
+ fc_info.retain_internal_weights, // retain_internal_weights
+ gemmlowp_output_stage, // gemmlowp_output_stage
+ fc_info.fp_mixed_precision, // fp_mixed_precision
+ true, // broadcast_bias
+ ActivationLayerInfo()); // activation_info
if(is_data_type_quantized_asymmetric(input.data_type()))
{
@@ -133,21 +133,21 @@ CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> mem
_are_weights_reshaped(true), _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
{
}
-void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, bool retain_internal_weights)
+void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info)
{
GEMMLowpOutputStageInfo gemmlowp_output_stage;
construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(), gemmlowp_output_stage);
- const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
- false, // is_b_reshaped
- true, // reshape_b_only_on_first_run
- 0, // depth_output_gemm3d
- false, // reinterpret_input_as_3d
- retain_internal_weights, // retain_internal_weights
- gemmlowp_output_stage, // gemmlowp_output_stage
- false, // fp_mixed_precision
- true, // broadcast_bias
- ActivationLayerInfo()); // activation_info
+ const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
+ false, // is_b_reshaped
+ true, // reshape_b_only_on_first_run
+ 0, // depth_output_gemm3d
+ false, // reinterpret_input_as_3d
+ fc_info.retain_internal_weights, // retain_internal_weights
+ gemmlowp_output_stage, // gemmlowp_output_stage
+ fc_info.fp_mixed_precision, // fp_mixed_precision
+ true, // broadcast_bias
+ ActivationLayerInfo()); // activation_info
if(_is_quantized)
{
@@ -173,7 +173,7 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor
}
}
-void CLFullyConnectedLayer::configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, bool retain_internal_weights)
+void CLFullyConnectedLayer::configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info)
{
ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
@@ -188,18 +188,18 @@ void CLFullyConnectedLayer::configure_conv_fc(const ICLTensor *input, const ICLT
_flatten_layer.configure(input, &_flatten_output);
// Configure matrix multiply kernel
- configure_mm(&_flatten_output, weights, bias, output, retain_internal_weights);
+ configure_mm(&_flatten_output, weights, bias, output, fc_info);
// Allocate the output tensor for flatten once all the configure methods have been called
_flatten_output.allocator()->allocate();
}
-void CLFullyConnectedLayer::configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, bool retain_internal_weights)
+void CLFullyConnectedLayer::configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info)
{
ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
// Configure matrix multiply kernel
- configure_mm(input, weights, bias, output, retain_internal_weights);
+ configure_mm(input, weights, bias, output, fc_info);
}
void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
@@ -289,12 +289,12 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w
if(_is_fc_after_conv)
{
// Fully Connected layer after a Convolution Layer without batches
- configure_conv_fc(input, weights_to_use, biases, output, fc_info.retain_internal_weights);
+ configure_conv_fc(input, weights_to_use, biases, output, fc_info);
}
else
{
// Fully Connected layer after a Fully Connected Layer without batches
- configure_fc_fc(input, weights_to_use, biases, output, fc_info.retain_internal_weights);
+ configure_fc_fc(input, weights_to_use, biases, output, fc_info);
}
}
@@ -368,7 +368,7 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
}
// Validate matrix multiply kernel
- ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(*input_to_use, *weights_to_use, biases, *output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info));
return Status{};
}