aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h4
-rw-r--r--examples/graph_lenet.cpp1
-rw-r--r--examples/graph_vgg16.cpp1
-rw-r--r--examples/graph_vgg19.cpp1
-rw-r--r--src/runtime/NEON/functions/NEFullyConnectedLayer.cpp128
5 files changed, 93 insertions, 42 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 92ca17a3a4..fe0f2f03f7 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -30,6 +30,7 @@
#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
@@ -129,6 +130,7 @@ private:
MemoryGroup _memory_group;
NEIm2ColKernel _im2col_kernel;
+ NEConvertFullyConnectedWeights _convert_weights;
NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
NEGEMM _mm_gemm;
NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
@@ -136,8 +138,10 @@ private:
NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
Tensor _im2col_output;
Tensor _gemmlowp_output;
+ Tensor _converted_weights_output;
Tensor _reshape_weights_output;
const ITensor *_original_weights;
+ bool _are_weights_converted;
bool _are_weights_reshaped;
bool _is_fc_after_conv;
bool _accumulate_biases;
diff --git a/examples/graph_lenet.cpp b/examples/graph_lenet.cpp
index f3aa266c50..c658d1b361 100644
--- a/examples/graph_lenet.cpp
+++ b/examples/graph_lenet.cpp
@@ -60,7 +60,6 @@ public:
// Checks
ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!");
- ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC && common_params.target != Target::CL, "Unsupported data layout!");
// Print parameter values
std::cout << common_params << std::endl;
diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp
index e23ea65dd7..808ce08ce7 100644
--- a/examples/graph_vgg16.cpp
+++ b/examples/graph_vgg16.cpp
@@ -60,7 +60,6 @@ public:
// Checks
ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!");
- ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC && common_params.target != Target::CL, "Unsupported data layout!");
// Print parameter values
std::cout << common_params << std::endl;
diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp
index 6cb6b1fae2..96a1d8a9d4 100644
--- a/examples/graph_vgg19.cpp
+++ b/examples/graph_vgg19.cpp
@@ -59,7 +59,6 @@ public:
// Checks
ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "Unsupported data type!");
- ARM_COMPUTE_EXIT_ON_MSG(common_params.data_layout == DataLayout::NHWC && common_params.target != Target::CL, "Unsupported data layout!");
// Print parameter values
std::cout << common_params << std::endl;
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index 9d3cb31c9a..34cabb5c2e 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -74,8 +74,9 @@ Status NEFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, c
}
NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _im2col_kernel(), _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(), _accumulate_biases_kernel(), _im2col_output(),
- _gemmlowp_output(), _reshape_weights_output(), _original_weights(nullptr), _are_weights_reshaped(false), _is_fc_after_conv(false), _accumulate_biases(false), _is_quantized(false), _is_prepared(false)
+ : _memory_group(std::move(memory_manager)), _im2col_kernel(), _convert_weights(), _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(), _accumulate_biases_kernel(),
+ _im2col_output(), _gemmlowp_output(), _converted_weights_output(), _reshape_weights_output(), _original_weights(nullptr), _are_weights_converted(true), _are_weights_reshaped(false),
+ _is_fc_after_conv(false), _accumulate_biases(false), _is_quantized(false), _is_prepared(false)
{
}
@@ -146,11 +147,12 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh
output->info(),
fc_info));
- _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
- _is_fc_after_conv = true;
- _accumulate_biases = false;
- _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
- _original_weights = weights;
+ _are_weights_converted = true;
+ _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
+ _is_fc_after_conv = true;
+ _accumulate_biases = false;
+ _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
+ _original_weights = weights;
// Configure gemmlowp output
if(_is_quantized)
@@ -175,17 +177,8 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh
const ITensor *weights_to_use = weights;
- if(!_are_weights_reshaped)
- {
- weights_to_use = &_reshape_weights_output;
-
- // Reshape the weights
- _reshape_weights_function.configure(weights, &_reshape_weights_output);
- }
-
// Check if we have a fully connected layer with batches
const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
-
if(is_batched_fc_layer)
{
_is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3,
@@ -197,6 +190,27 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh
_is_fc_after_conv = input->info()->num_dimensions() > 1;
}
+ // Reshape weights if needed
+ if(!_are_weights_reshaped)
+ {
+ // Reshape the weights
+ _reshape_weights_function.configure(weights, &_reshape_weights_output);
+ weights_to_use = &_reshape_weights_output;
+ }
+
+ // Convert weights if needed
+ if(_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout))
+ {
+ // Convert weights
+ _convert_weights.configure(weights_to_use,
+ &_converted_weights_output,
+ input->info()->tensor_shape(),
+ fc_info.weights_trained_layout);
+
+ weights_to_use = &_converted_weights_output;
+ _are_weights_converted = false;
+ }
+
ITensor *tmp_output = (_is_quantized) ? &_gemmlowp_output : output;
if(_is_fc_after_conv)
{
@@ -235,9 +249,10 @@ Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
bool is_fc_after_conv = true;
bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
- const ITensorInfo &im2col_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_fc_shape(input)));
- const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
- const ITensorInfo &gemmlowp_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ const ITensorInfo &im2col_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_fc_shape(input)));
+ const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));
+ const ITensorInfo &converted_weights = TensorInfo(reshaped_weights.clone()->set_is_resizable(true).reset_padding());
+ const ITensorInfo &gemmlowp_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
// Configure accumulate biases kernel for non quantized asymmetric types
if(biases != nullptr && !is_quantized)
@@ -256,13 +271,6 @@ Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
const ITensorInfo *weights_to_use = weights;
const ITensorInfo *tmp_output = (is_quantized) ? &gemmlowp_output : output;
- if(!weights_reshaped)
- {
- // Validate reshape weights kernel
- ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
- weights_to_use = &reshaped_weights;
- }
-
// Check if we have a fully connected layer with batches
const bool is_batched_fc_layer = output->dimension(1) > 1;
@@ -277,6 +285,23 @@ Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
is_fc_after_conv = input->num_dimensions() > 1;
}
+ if(!weights_reshaped)
+ {
+ // Validate reshape weights kernel
+ ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
+ weights_to_use = &reshaped_weights;
+ }
+
+ if(is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout))
+ {
+ // Validate convert weights kernel
+ ARM_COMPUTE_RETURN_ON_ERROR(NEConvertFullyConnectedWeights::validate(weights_to_use,
+ &converted_weights,
+ input->tensor_shape(),
+ fc_info.weights_trained_layout));
+ weights_to_use = &converted_weights;
+ }
+
if(is_fc_after_conv)
{
// Fully Connected layer after a Convolution Layer without batches
@@ -345,29 +370,54 @@ void NEFullyConnectedLayer::prepare()
{
if(!_is_prepared)
{
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
+ auto release_unused = [](Tensor * w)
+ {
+ if(!w->is_used())
+ {
+ w->allocator()->free();
+ }
+ };
+
+ // Pointer to current weights
+ const ITensor *cur_weights = _original_weights;
+
// Reshape of the weights (happens only once)
if(!_are_weights_reshaped)
{
- ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
-
// Run reshape weights kernel and mark weights as unused
_reshape_weights_output.allocator()->allocate();
_reshape_weights_function.run();
- _original_weights->mark_as_unused();
-
- // Prepare GEMM prepare and release unused weights
- if(!_is_quantized)
- {
- _mm_gemm.prepare();
- if(!_reshape_weights_output.is_used())
- {
- _reshape_weights_output.allocator()->free();
- }
- }
+ cur_weights->mark_as_unused();
+ cur_weights = &_reshape_weights_output;
_are_weights_reshaped = true;
}
+ // Convert weights if needed (happens only once)
+ if(!_are_weights_converted)
+ {
+ _converted_weights_output.allocator()->allocate();
+ _convert_weights.run();
+
+ cur_weights->mark_as_unused();
+ _are_weights_converted = true;
+ }
+
+ // Release reshaped weights if unused
+ release_unused(&_reshape_weights_output);
+
+ // Prepare GEMM prepare and release unused weights
+ if(!_is_quantized)
+ {
+ _mm_gemm.prepare();
+ }
+
+ // Release converted weights if unused
+ release_unused(&_reshape_weights_output);
+ release_unused(&_converted_weights_output);
+
_is_prepared = true;
}
}