aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-10-16 19:10:46 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:55:45 +0000
commit08346e9b9a7dadd2f0765aea64e656902d843e8a (patch)
tree6caaebd05a3b78a4a1e8bfb077aa02d75bd4cca0
parent8aaf93e8c12ce93d3d0082d4f4b70376f15536da (diff)
downloadComputeLibrary-08346e9b9a7dadd2f0765aea64e656902d843e8a.tar.gz
COMPMID-1451:Fuse RELU,LU_BOUNDED_RELU with requantization in NEGEMMConvolutionLayer.
Change-Id: Iea5f2c5bcac8051c4c7655a6eabb2c43772eb31f Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/154104 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
-rw-r--r--arm_compute/core/Types.h4
-rw-r--r--arm_compute/graph/backends/FunctionHelpers.h18
-rw-r--r--arm_compute/graph/nodes/BatchNormalizationLayerNode.h3
-rw-r--r--arm_compute/graph/nodes/ConvolutionLayerNode.h24
-rw-r--r--src/graph/backends/GLES/GCFunctionsFactory.cpp10
-rw-r--r--src/graph/backends/NEON/NEFunctionFactory.cpp14
-rw-r--r--src/graph/mutators/NodeFusionMutator.cpp30
-rw-r--r--src/graph/nodes/BatchNormalizationLayerNode.cpp2
-rw-r--r--src/graph/nodes/ConvolutionLayerNode.cpp14
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp92
10 files changed, 147 insertions, 64 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index c0350bc7a4..8a8cd509fa 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -148,7 +148,7 @@ struct QuantizationInfo
*
* @return True if the given quantization info is the same.
*/
- bool operator==(const QuantizationInfo &other)
+ bool operator==(const QuantizationInfo &other) const
{
return scale == other.scale && offset == other.offset;
}
@@ -159,7 +159,7 @@ struct QuantizationInfo
*
* @return True if the given quantization info is not the same.
*/
- bool operator!=(const QuantizationInfo &other)
+ bool operator!=(const QuantizationInfo &other) const
{
return !(*this == other);
}
diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h
index 56f4173fa0..a1cadcbf4c 100644
--- a/arm_compute/graph/backends/FunctionHelpers.h
+++ b/arm_compute/graph/backends/FunctionHelpers.h
@@ -266,10 +266,11 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
biases->info()->set_data_type(DataType::S32);
}
- const PadStrideInfo conv_info = node.convolution_info();
- const unsigned int num_groups = node.num_groups();
- const ConvolutionMethod conv_algorithm = node.convolution_method();
- const bool fast_math = node.fast_math_hint() == FastMathHint::Enabled;
+ const PadStrideInfo conv_info = node.convolution_info();
+ const unsigned int num_groups = node.num_groups();
+ const ConvolutionMethod conv_algorithm = node.convolution_method();
+ const bool fast_math = node.fast_math_hint() == FastMathHint::Enabled;
+ const ActivationLayerInfo fused_act = node.fused_activation();
// Create and configure function (we assume that functions have been validated before creation)
std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, TargetInfo::TargetType);
@@ -281,28 +282,28 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
ARM_COMPUTE_ERROR_ON_MSG(num_groups != 1, "WinogradConvolutionLayer does not support grouping!");
std::tie(func, func_name) = create_named_memory_managed_function<typename ConvolutionLayerFunctions::WinogradConvolutionLayer>(
std::string("WinogradConvolutionLayer"), mm,
- input, weights, biases, output, conv_info, ActivationLayerInfo(), fast_math);
+ input, weights, biases, output, conv_info, fused_act, fast_math);
}
else if(conv_algorithm == ConvolutionMethod::Direct)
{
ARM_COMPUTE_ERROR_ON_MSG(num_groups != 1, "DirectConvolutionLayer does not support grouping!");
std::tie(func, func_name) = create_named_function<typename ConvolutionLayerFunctions::DirectConvolutionLayer>(
std::string("DirectConvolutionLayer"),
- input, weights, biases, output, conv_info);
+ input, weights, biases, output, conv_info, fused_act);
}
else if(conv_algorithm == ConvolutionMethod::GEMM)
{
std::tie(func, func_name) = create_named_memory_managed_function<typename ConvolutionLayerFunctions::GEMMConvolutionLayer>(
std::string("GEMMConvolutionLayer"), mm,
input, weights, biases, output, conv_info,
- WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), num_groups);
+ WeightsInfo(), Size2D(1U, 1U), fused_act, num_groups);
}
else
{
std::tie(func, func_name) = create_named_memory_managed_function<typename ConvolutionLayerFunctions::GenericConvolutionLayer>(
std::string("GenericConvolutionLayer"), mm,
input, weights, biases, output, conv_info,
- WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math, num_groups);
+ WeightsInfo(), Size2D(1U, 1U), fused_act, fast_math, num_groups);
}
// Log info
@@ -321,6 +322,7 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node,
<< " Input shape: " << input->info()->tensor_shape()
<< " Weights shape: " << weights->info()->tensor_shape()
<< " Output shape: " << output->info()->tensor_shape()
+ << (fused_act.enabled() ? " " + to_string(fused_act.activation()) : "")
<< std::endl);
return func;
}
diff --git a/arm_compute/graph/nodes/BatchNormalizationLayerNode.h b/arm_compute/graph/nodes/BatchNormalizationLayerNode.h
index a364d1c5ae..b2284782bd 100644
--- a/arm_compute/graph/nodes/BatchNormalizationLayerNode.h
+++ b/arm_compute/graph/nodes/BatchNormalizationLayerNode.h
@@ -62,6 +62,9 @@ public:
TensorDescriptor configure_output(size_t idx) const override;
void accept(INodeVisitor &v) override;
+public:
+ static constexpr NodeType node_type = NodeType::BatchNormalizationLayer;
+
private:
float _epsilon;
ActivationLayerInfo _fused_activation;
diff --git a/arm_compute/graph/nodes/ConvolutionLayerNode.h b/arm_compute/graph/nodes/ConvolutionLayerNode.h
index 0698ac1360..eecb927ab2 100644
--- a/arm_compute/graph/nodes/ConvolutionLayerNode.h
+++ b/arm_compute/graph/nodes/ConvolutionLayerNode.h
@@ -80,6 +80,16 @@ public:
* @return Number of groups in convolution
*/
unsigned int num_groups() const;
+ /** Returns fused activation
+ *
+ * @return Fused activation
+ */
+ ActivationLayerInfo fused_activation() const;
+ /** Sets fused activation
+ *
+ * @param[in] fused_activation Fused activation to set
+ */
+ void set_fused_activation(ActivationLayerInfo fused_activation);
/** Computes convolution output descriptor
*
* @param[in] input_descriptor Input descriptor
@@ -98,12 +108,16 @@ public:
TensorDescriptor configure_output(size_t idx) const override;
void accept(INodeVisitor &v) override;
+public:
+ static constexpr NodeType node_type = NodeType::ConvolutionLayer;
+
private:
- PadStrideInfo _info;
- unsigned int _num_groups;
- ConvolutionMethod _method;
- FastMathHint _fast_math_hint;
- QuantizationInfo _out_quant_info;
+ PadStrideInfo _info;
+ unsigned int _num_groups;
+ ConvolutionMethod _method;
+ FastMathHint _fast_math_hint;
+ QuantizationInfo _out_quant_info;
+ ActivationLayerInfo _fused_activation;
};
} // namespace graph
} // namespace arm_compute
diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp
index 6268583938..02a05679a3 100644
--- a/src/graph/backends/GLES/GCFunctionsFactory.cpp
+++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp
@@ -120,8 +120,9 @@ std::unique_ptr<IFunction> create_convolution_layer<GCConvolutionLayerFunctions,
biases->info()->set_data_type(DataType::S32);
}
- const PadStrideInfo conv_info = node.convolution_info();
- const ConvolutionMethod conv_algorithm = node.convolution_method();
+ const PadStrideInfo conv_info = node.convolution_info();
+ const ConvolutionMethod conv_algorithm = node.convolution_method();
+ const ActivationLayerInfo fused_act = node.fused_activation();
// Create and configure function (we assume that functions have been validated before creation)
std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, GCTargetInfo::TargetType);
@@ -132,13 +133,13 @@ std::unique_ptr<IFunction> create_convolution_layer<GCConvolutionLayerFunctions,
{
std::tie(func, func_name) = create_named_function<GCConvolutionLayerFunctions::DirectConvolutionLayer>(
std::string("DirectConvolutionLayer"),
- input, weights, biases, output, conv_info);
+ input, weights, biases, output, conv_info, fused_act);
}
else
{
std::tie(func, func_name) = create_named_memory_managed_function<GCConvolutionLayerFunctions::GenericConvolutionLayer>(
std::string("ConvolutionLayer"), mm,
- input, weights, biases, output, conv_info);
+ input, weights, biases, output, conv_info, WeightsInfo(), Size2D(1U, 1U), fused_act);
}
// Log info
@@ -149,6 +150,7 @@ std::unique_ptr<IFunction> create_convolution_layer<GCConvolutionLayerFunctions,
<< " Input shape: " << input->info()->tensor_shape()
<< " Weights shape: " << weights->info()->tensor_shape()
<< " Output shape: " << output->info()->tensor_shape()
+ << (fused_act.enabled() ? " " + to_string(fused_act.activation()) : "")
<< std::endl);
return func;
}
diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp
index 286c890088..e967c1be61 100644
--- a/src/graph/backends/NEON/NEFunctionFactory.cpp
+++ b/src/graph/backends/NEON/NEFunctionFactory.cpp
@@ -97,8 +97,9 @@ std::unique_ptr<IFunction> create_convolution_layer<NEConvolutionLayerFunctions,
biases->info()->set_data_type(DataType::S32);
}
- const PadStrideInfo conv_info = node.convolution_info();
- const ConvolutionMethod conv_algorithm = node.convolution_method();
+ const PadStrideInfo conv_info = node.convolution_info();
+ const ConvolutionMethod conv_algorithm = node.convolution_method();
+ const ActivationLayerInfo fused_act = node.fused_activation();
// Create and configure function (we assume that functions have been validated before creation)
std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, Target::NEON);
@@ -107,22 +108,22 @@ std::unique_ptr<IFunction> create_convolution_layer<NEConvolutionLayerFunctions,
if(conv_algorithm == ConvolutionMethod::Direct)
{
std::tie(func, func_name) = create_named_memory_managed_function<NEDirectConvolutionLayer>(
- std::string("DirectConvolutionLayer"), mm, input, weights, biases, output, conv_info);
+ std::string("DirectConvolutionLayer"), mm, input, weights, biases, output, conv_info, fused_act);
}
else if(conv_algorithm == ConvolutionMethod::GEMM)
{
std::tie(func, func_name) = create_named_memory_managed_function<NEGEMMConvolutionLayer>(
- std::string("GEMMConvolutionLayer"), mm, input, weights, biases, output, conv_info);
+ std::string("GEMMConvolutionLayer"), mm, input, weights, biases, output, conv_info, WeightsInfo(), Size2D(1, 1), fused_act);
}
else if(conv_algorithm == ConvolutionMethod::Winograd)
{
std::tie(func, func_name) = create_named_memory_managed_function<NEWinogradConvolutionLayer>(
- std::string("WinogradConvolutionLayer"), mm, input, weights, biases, output, conv_info);
+ std::string("WinogradConvolutionLayer"), mm, input, weights, biases, output, conv_info, fused_act);
}
else
{
std::tie(func, func_name) = create_named_memory_managed_function<NEConvolutionLayer>(
- std::string("ConvolutionLayer"), mm, input, weights, biases, output, conv_info);
+ std::string("ConvolutionLayer"), mm, input, weights, biases, output, conv_info, WeightsInfo(), Size2D(1, 1), fused_act);
}
// Log info
@@ -140,6 +141,7 @@ std::unique_ptr<IFunction> create_convolution_layer<NEConvolutionLayerFunctions,
<< " Input shape: " << input->info()->tensor_shape()
<< " Weights shape: " << weights->info()->tensor_shape()
<< " Output shape: " << output->info()->tensor_shape()
+ << (fused_act.enabled() ? " " + to_string(fused_act.activation()) : "")
<< std::endl);
return func;
}
diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp
index 82bfe25a3e..7e66ce0757 100644
--- a/src/graph/mutators/NodeFusionMutator.cpp
+++ b/src/graph/mutators/NodeFusionMutator.cpp
@@ -38,26 +38,24 @@ namespace graph
{
namespace detail
{
-void fuse_batch_norm_with_activation(Graph &g)
+template <typename N>
+void fuse_node_with_activation(Graph &g, const std::set<Activation> &supported_fused_activations)
{
- // Supported activations when fusing
- const std::set<Activation> supported_fused_activations = { Activation::RELU, Activation::BOUNDED_RELU, Activation::LU_BOUNDED_RELU };
-
// Not interested in the order of nodes
for(auto &node : g.nodes())
{
// Check if the node is batch norm and not a branching node
- if(node && node->type() == NodeType::BatchNormalizationLayer && node->output_edges().size() == 1)
+ if(node && node->type() == N::node_type && node->output_edges().size() == 1)
{
auto output_edge_id = *node->output_edges().begin();
auto output_edge = g.edge(output_edge_id);
// Check if following node is an activation layer node
if((output_edge != nullptr) && (output_edge->consumer() != nullptr) && (output_edge->consumer()->type() == NodeType::ActivationLayer))
{
- auto *bn_node = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->producer());
+ auto *n_node = arm_compute::utils::cast::polymorphic_downcast<N *>(output_edge->producer());
auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(output_edge->consumer());
- ARM_COMPUTE_ERROR_ON(act_node->output(0) == nullptr || bn_node->output(0) == nullptr);
+ ARM_COMPUTE_ERROR_ON(act_node->output(0) == nullptr || n_node->output(0) == nullptr);
// Check if activation is supported for fusion
if(supported_fused_activations.count(act_node->activation_info().activation()) == 0)
@@ -65,17 +63,17 @@ void fuse_batch_norm_with_activation(Graph &g)
continue;
}
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing Batch Normalization node with ID : " << output_edge->producer_id()
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing node with ID : " << output_edge->producer_id()
<< " with Activation Layer node with ID : " << output_edge->consumer_id() << std::endl);
// Prevent fusion if batch normalization node has an output accessor
- if(bn_node->output(0)->accessor() == nullptr)
+ if(n_node->output(0)->accessor() == nullptr)
{
// Get driving nodes of activation node
std::vector<NodeIdxPair> act_driving_nodes = get_driving_nodes(*act_node);
// Set activation info to batch normalization
- bn_node->set_fused_activation(act_node->activation_info());
+ n_node->set_fused_activation(act_node->activation_info());
// Extract activation node accessor if any
auto act_node_accessor = act_node->output(0)->extract_accessor();
@@ -86,15 +84,15 @@ void fuse_batch_norm_with_activation(Graph &g)
// Update batch normalization node outputs
for(auto &driving_node : act_driving_nodes)
{
- g.add_connection(bn_node->id(), 0, driving_node.node_id, driving_node.index);
+ g.add_connection(n_node->id(), 0, driving_node.node_id, driving_node.index);
}
// Update accessor to batch normalization node
- bn_node->output(0)->set_accessor(std::move(act_node_accessor));
+ n_node->output(0)->set_accessor(std::move(act_node_accessor));
}
else
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion as batch normalization node has an output accessor\n");
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion of node with activation due to the presence of an output accessor\n");
}
}
}
@@ -109,7 +107,11 @@ const char *NodeFusionMutator::name()
void NodeFusionMutator::mutate(Graph &g)
{
- detail::fuse_batch_norm_with_activation(g);
+ // Supported activations when fusing
+ const std::set<Activation> supported_fused_activations = { Activation::RELU, Activation::BOUNDED_RELU, Activation::LU_BOUNDED_RELU };
+
+ detail::fuse_node_with_activation<BatchNormalizationLayerNode>(g, supported_fused_activations);
+ detail::fuse_node_with_activation<ConvolutionLayerNode>(g, supported_fused_activations);
}
} // namespace graph
} // namespace arm_compute
diff --git a/src/graph/nodes/BatchNormalizationLayerNode.cpp b/src/graph/nodes/BatchNormalizationLayerNode.cpp
index 3ae11fc24d..3d392bda1b 100644
--- a/src/graph/nodes/BatchNormalizationLayerNode.cpp
+++ b/src/graph/nodes/BatchNormalizationLayerNode.cpp
@@ -78,7 +78,7 @@ TensorDescriptor BatchNormalizationLayerNode::configure_output(size_t idx) const
NodeType BatchNormalizationLayerNode::type() const
{
- return NodeType::BatchNormalizationLayer;
+ return BatchNormalizationLayerNode::node_type;
}
void BatchNormalizationLayerNode::accept(INodeVisitor &v)
diff --git a/src/graph/nodes/ConvolutionLayerNode.cpp b/src/graph/nodes/ConvolutionLayerNode.cpp
index e9cb0396eb..15c7ff68f8 100644
--- a/src/graph/nodes/ConvolutionLayerNode.cpp
+++ b/src/graph/nodes/ConvolutionLayerNode.cpp
@@ -37,7 +37,7 @@ ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info,
ConvolutionMethod method,
FastMathHint fast_math_hint,
QuantizationInfo out_quant_info)
- : _info(std::move(info)), _num_groups(num_groups), _method(method), _fast_math_hint(fast_math_hint), _out_quant_info(out_quant_info)
+ : _info(std::move(info)), _num_groups(num_groups), _method(method), _fast_math_hint(fast_math_hint), _out_quant_info(out_quant_info), _fused_activation()
{
_input_edges.resize(3, EmptyEdgeID);
_outputs.resize(1, NullTensorID);
@@ -73,6 +73,16 @@ unsigned int ConvolutionLayerNode::num_groups() const
return _num_groups;
}
+ActivationLayerInfo ConvolutionLayerNode::fused_activation() const
+{
+ return _fused_activation;
+}
+
+void ConvolutionLayerNode::set_fused_activation(ActivationLayerInfo fused_activation)
+{
+ _fused_activation = fused_activation;
+}
+
TensorDescriptor ConvolutionLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
const TensorDescriptor &weights_descriptor,
const PadStrideInfo &info)
@@ -126,7 +136,7 @@ TensorDescriptor ConvolutionLayerNode::configure_output(size_t idx) const
NodeType ConvolutionLayerNode::type() const
{
- return NodeType::ConvolutionLayer;
+ return ConvolutionLayerNode::node_type;
}
void ConvolutionLayerNode::accept(INodeVisitor &v)
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 55b70ff193..fb6d4a1847 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -32,6 +32,7 @@
#include "support/ToolchainSupport.h"
#include <cmath>
+#include <set>
#include <tuple>
using namespace arm_compute;
@@ -190,13 +191,14 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
const unsigned int kernel_width = weights->info()->dimension(idx_width);
const unsigned int kernel_height = weights->info()->dimension(idx_height);
- _is_prepared = weights_info.retain_internal_weights();
- _original_weights = weights;
- _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
- _data_layout = data_layout;
- _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
- _skip_col2im = data_layout == DataLayout::NHWC;
- _append_bias = (biases != nullptr) && (!_is_quantized);
+ _is_prepared = weights_info.retain_internal_weights();
+ _original_weights = weights;
+ _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
+ _data_layout = data_layout;
+ _skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
+ _skip_col2im = data_layout == DataLayout::NHWC;
+ _append_bias = (biases != nullptr) && (!_is_quantized);
+ _is_activationlayer_enabled = act_info.enabled();
const ITensor *gemm_input_to_use = input;
ITensor *gemm_output_to_use = output;
@@ -285,9 +287,10 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
if(_is_quantized)
{
const bool skip_reshape = data_layout == DataLayout::NHWC;
- const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();
+ const QuantizationInfo input_quant_info = input->info()->quantization_info();
+ const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input_quant_info : output->info()->quantization_info();
- float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;
+ float multiplier = input_quant_info.scale * weights->info()->quantization_info().scale / output_quant_info.scale;
int output_multiplier, output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
@@ -297,7 +300,29 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
gemm_output_staged_to_use = &_tmp_output;
}
- _gemmlowp_output_stage.configure(gemm_output_to_use, biases, gemm_output_staged_to_use, output_multiplier, output_shift, output_quant_info.offset, 0, 0, skip_reshape ? conv_h : 1);
+ // Merge activation with output stage
+ uint8_t min = 0;
+ uint8_t max = 0;
+ const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
+ ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
+ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU
+ };
+ if(_is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0)
+ {
+ min = sqcvt_qasymm8_f32(act_info.b(), input_quant_info.scale, input_quant_info.offset);
+ max = sqcvt_qasymm8_f32(act_info.a(), input_quant_info.scale, input_quant_info.offset);
+ if(act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
+ {
+ min = sqcvt_qasymm8_f32(0.f, input_quant_info.scale, input_quant_info.offset);
+ }
+ if(act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU)
+ {
+ max = 255;
+ }
+ _is_activationlayer_enabled = false;
+ }
+
+ _gemmlowp_output_stage.configure(gemm_output_to_use, biases, gemm_output_staged_to_use, output_multiplier, output_shift, output_quant_info.offset, min, max, skip_reshape ? conv_h : 1);
}
if(!_skip_col2im && _data_layout == DataLayout::NCHW)
@@ -319,9 +344,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(idx_width) != conv_w) || (output->info()->dimension(idx_height) != conv_h),
"Output shape does not match the expected one");
- //Configure Activation Layer
- _is_activationlayer_enabled = act_info.enabled();
-
+ // Configure Activation Layer
if(_is_activationlayer_enabled)
{
_activationlayer_function.configure(output, nullptr, act_info);
@@ -356,10 +379,11 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
const ITensorInfo *gemm_output_staged_to_use = output;
const ITensorInfo *weights_to_use = weights;
- const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
- const bool append_bias = (biases != nullptr) && (!is_quantized);
- bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
- bool skip_col2im = data_layout == DataLayout::NHWC;
+ const bool is_quantized = is_data_type_quantized_asymmetric(data_type);
+ const bool append_bias = (biases != nullptr) && (!is_quantized);
+ bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1);
+ bool skip_col2im = data_layout == DataLayout::NHWC;
+ bool is_activation_enabled = act_info.enabled();
// Get convolved dimensions
unsigned int conv_w = 0;
@@ -457,9 +481,11 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
if(is_quantized)
{
- const bool skip_reshape = data_layout == DataLayout::NHWC;
- const float multiplier = input->quantization_info().scale * weights_to_use->quantization_info().scale / output->quantization_info().scale;
- int output_multiplier, output_shift;
+ const bool skip_reshape = data_layout == DataLayout::NHWC;
+ const QuantizationInfo input_quant_info = input->quantization_info();
+ const QuantizationInfo output_quant_info = (output->total_size() == 0) ? input_quant_info : output->quantization_info();
+ const float multiplier = input_quant_info.scale * weights_to_use->quantization_info().scale / output_quant_info.scale;
+ int output_multiplier, output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
if(!skip_reshape)
@@ -469,8 +495,30 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
gemm_output_staged_to_use = &tmp_info;
}
+ // Merge activation with output stage
+ uint8_t min = 0;
+ uint8_t max = 0;
+ const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = { ActivationLayerInfo::ActivationFunction::RELU,
+ ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
+ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU
+ };
+ if(is_activation_enabled && supported_acts.count(act_info.activation()) != 0)
+ {
+ min = sqcvt_qasymm8_f32(act_info.b(), input_quant_info.scale, input_quant_info.offset);
+ max = sqcvt_qasymm8_f32(act_info.a(), input_quant_info.scale, input_quant_info.offset);
+ if(act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
+ {
+ min = sqcvt_qasymm8_f32(0.f, input_quant_info.scale, input_quant_info.offset);
+ }
+ if(act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU)
+ {
+ max = 255;
+ }
+ is_activation_enabled = false;
+ }
+
// Validate output stage for quantized case
- NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(gemm_output_to_use, biases, gemm_output_staged_to_use, 0, 0, skip_reshape ? conv_h : 1);
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(gemm_output_to_use, biases, gemm_output_staged_to_use, min, max, skip_reshape ? conv_h : 1);
}
// Validate Col2Im/ReshapeLayer
@@ -482,7 +530,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
}
//Validate Activation Layer
- if(act_info.enabled())
+ if(is_activation_enabled)
{
ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
}