diff options
Diffstat (limited to 'src/runtime')
-rw-r--r-- | src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 157 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLLSTMLayer.cpp | 20 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLRNNLayer.cpp | 4 | ||||
-rw-r--r-- | src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp | 8 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEFullyConnectedLayer.cpp | 21 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NERNNLayer.cpp | 4 |
6 files changed, 134 insertions, 80 deletions
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index 273ef96a03..ccd7813fbc 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -73,8 +73,9 @@ Status CLFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, c } CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(memory_manager), _im2col_kernel(), _reshape_weights_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _accumulate_biases_kernel(), - _im2col_output(), _gemmlowp_output(), _reshape_weights_output(), _are_weights_reshaped(true), _is_fc_after_conv(true), _accumulate_biases(false), _is_quantized(false), _original_weights(nullptr) + : _memory_group(memory_manager), _im2col_kernel(), _convert_weights(), _reshape_weights_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), + _accumulate_biases_kernel(), _im2col_output(), _gemmlowp_output(), _converted_weights_output(), _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true), + _is_fc_after_conv(true), _accumulate_biases(false), _is_quantized(false), _is_prepared(false), _original_weights(nullptr) { } @@ -112,7 +113,7 @@ void CLFullyConnectedLayer::configure_conv_fc(const ICLTensor *input, const ICLT // Initialize output tensor for im2col TensorShape shape_im2col = compute_im2col_fc_shape(input->info()); - _im2col_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col)); + _im2col_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col).set_data_layout(DataLayout::NCHW)); // Configure im2col kernel _memory_group.manage(&_im2col_output); @@ -134,8 +135,8 @@ void CLFullyConnectedLayer::configure_fc_fc(const ICLTensor *input, const ICLTen configure_mm(input, weights, output); } -void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose_weights, bool are_weights_reshaped, - bool retain_internal_weights) +void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, + FullyConnectedLayerInfo fc_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -144,15 +145,15 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), - transpose_weights, - are_weights_reshaped, - retain_internal_weights)); + fc_info)); - _are_weights_reshaped = transpose_weights ? are_weights_reshaped : true; - _is_fc_after_conv = true; - _accumulate_biases = false; - _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); - _original_weights = weights; + _are_weights_converted = true; + _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; + _is_fc_after_conv = true; + _accumulate_biases = false; + _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); + _is_prepared = false; + _original_weights = weights; // Configure gemmlowp output if(_is_quantized) @@ -172,25 +173,16 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w _accumulate_biases_kernel.configure(output, biases); } + const ICLTensor *weights_to_use = weights; + // With the Fully Connected layer we can have 4 different cases: // 1) Convolution layer -> Fully Connected layer without batches // 2) Fully Connected layer -> Fully Connected layer without batches // 3) Convolution layer -> Fully Connected layer with batches // 4) Fully Connected layer -> Fully Connected layer with batches - const ICLTensor *weights_to_use = weights; - - if(!_are_weights_reshaped) - { - weights_to_use = &_reshape_weights_output; - - // Reshape the weights - _reshape_weights_kernel.configure(weights, &_reshape_weights_output); - } - // Check if we have a fully connected layer with batches const bool is_batched_fc_layer = output->info()->dimension(1) > 1; - if(is_batched_fc_layer) { _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3, @@ -202,6 +194,28 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w _is_fc_after_conv = input->info()->num_dimensions() > 1; } + // Reshape weights if needed + if(!_are_weights_reshaped) + { + // Reshape the weights + _reshape_weights_kernel.configure(weights, &_reshape_weights_output); + weights_to_use = &_reshape_weights_output; + } + + // Convert weights if needed + if(_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout)) + { + // Convert weights + _convert_weights.configure(weights_to_use, + &_converted_weights_output, + input->info()->tensor_shape(), + fc_info.weights_trained_layout); + + weights_to_use = &_converted_weights_output; + _are_weights_converted = false; + } + + // Configure fc core ICLTensor *tmp_output = (_is_quantized) ? &_gemmlowp_output : output; if(_is_fc_after_conv) { @@ -224,26 +238,26 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w _gemmlowp_output.allocator()->allocate(); } - _are_weights_reshaped = _are_weights_reshaped || retain_internal_weights; + _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights; } -Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, bool transpose_weights, bool are_weights_reshaped, - bool retain_internal_weights) +Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, + FullyConnectedLayerInfo fc_info) { - ARM_COMPUTE_UNUSED(retain_internal_weights); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); - bool weights_reshaped = transpose_weights ? are_weights_reshaped : true; + bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; bool is_fc_after_conv = true; bool is_quantized = is_data_type_quantized_asymmetric(input->data_type()); const GPUTarget gpu_target = CLScheduler::get().target(); - const ITensorInfo &im2col_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_fc_shape(input))); - const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights))); - const ITensorInfo &gemmlowp_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); + const ITensorInfo &im2col_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_fc_shape(input)).set_data_layout(DataLayout::NCHW)); + const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights))); + const ITensorInfo &converted_weights = TensorInfo(reshaped_weights.clone()->set_is_resizable(true).reset_padding()); + const ITensorInfo &gemmlowp_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); // Configure accumulate biases kernel for non quantized asymmetric types if(biases != nullptr && !is_quantized) @@ -262,16 +276,8 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn const ITensorInfo *weights_to_use = weights; const ITensorInfo *tmp_output = (is_quantized) ? &gemmlowp_output : output; - if(!weights_reshaped) - { - // Validate reshape weights kernel - ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights)); - weights_to_use = &reshaped_weights; - } - // Check if we have a fully connected layer with batches const bool is_batched_fc_layer = output->dimension(1) > 1; - if(is_batched_fc_layer) { is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->tensor_shape().cbegin() + 3, @@ -283,6 +289,23 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn is_fc_after_conv = input->num_dimensions() > 1; } + if(!weights_reshaped) + { + // Validate reshape weights kernel + ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights)); + weights_to_use = &reshaped_weights; + } + + if(is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout)) + { + // Validate convert weights kernel + ARM_COMPUTE_RETURN_ON_ERROR(CLConvertFullyConnectedWeights::validate(weights_to_use, + &converted_weights, + input->tensor_shape(), + fc_info.weights_trained_layout)); + weights_to_use = &converted_weights; + } + if(is_fc_after_conv) { // Fully Connected layer after a Convolution Layer without batches @@ -349,27 +372,57 @@ void CLFullyConnectedLayer::run() void CLFullyConnectedLayer::prepare() { - // Reshape of the weights (happens only once) - if(!_are_weights_reshaped) + if(!_is_prepared) { ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - // Run reshape weights kernel and mark weights as unused - _reshape_weights_output.allocator()->allocate(); - _reshape_weights_kernel.run(); - _original_weights->mark_as_unused(); + auto release_unused = [](CLTensor * w) + { + if(!w->is_used()) + { + CLScheduler::get().queue().finish(); + w->allocator()->free(); + } + }; + + // Pointer to current weights + const ICLTensor *cur_weights = _original_weights; + + // Reshape of the weights if needed (happens only once) + if(!_are_weights_reshaped) + { + // Run reshape weights kernel and mark weights as unused + _reshape_weights_output.allocator()->allocate(); + _reshape_weights_kernel.run(); + + cur_weights->mark_as_unused(); + cur_weights = &_reshape_weights_output; + _are_weights_reshaped = true; + } + + // Convert weights if needed (happens only once) + if(!_are_weights_converted) + { + _converted_weights_output.allocator()->allocate(); + _convert_weights.run(); + + cur_weights->mark_as_unused(); + _are_weights_converted = true; + } + + // Release reshaped weights if unused + release_unused(&_reshape_weights_output); // Prepare GEMM prepare and release unused weights if(!_is_quantized) { _mm_gemm.prepare(); - if(!_reshape_weights_output.is_used()) - { - _reshape_weights_output.allocator()->free(); - } } - CLScheduler::get().queue().finish(); - _are_weights_reshaped = true; + // Release converted weights if unused + release_unused(&_reshape_weights_output); + release_unused(&_converted_weights_output); + + _is_prepared = true; } } diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp index 872325175d..d384400ed3 100644 --- a/src/runtime/CL/functions/CLLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLLSTMLayer.cpp @@ -90,7 +90,7 @@ void CLLSTMLayer::configure(const ICLTensor *input, const ICLTensor *input_to_fo // Configure block that calculates the forget gate // forget_gate = Activation(input * input_to_forget_weights + output_state * recurrent_to_forget_weights + PixelWiseMul(cell_state, cell_to_forget_weights) + forget_gate_bias) _memory_group.manage(&_forget_gate_out1); - _fully_connected_forget_gate.configure(input, input_to_forget_weights, forget_gate_bias, &_forget_gate_out1, true, false); + _fully_connected_forget_gate.configure(input, input_to_forget_weights, forget_gate_bias, &_forget_gate_out1); _memory_group.manage(&_forget_gate_out2); _transpose_forget_gate.configure(recurrent_to_forget_weights, &_forget_gate_out2); _memory_group.manage(&_forget_gate_out3); @@ -142,7 +142,7 @@ void CLLSTMLayer::configure(const ICLTensor *input, const ICLTensor *input_to_fo _input_gate_out5.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); _memory_group.manage(&_input_gate_out1); - _fully_connected_input_gate.configure(input, lstm_params.input_to_input_weights(), lstm_params.input_gate_bias(), &_input_gate_out1, true, false); + _fully_connected_input_gate.configure(input, lstm_params.input_to_input_weights(), lstm_params.input_gate_bias(), &_input_gate_out1); _memory_group.manage(&_input_gate_out2); _transpose_input_gate.configure(lstm_params.recurrent_to_input_weights(), &_input_gate_out2); _memory_group.manage(&_input_gate_out3); @@ -169,7 +169,7 @@ void CLLSTMLayer::configure(const ICLTensor *input, const ICLTensor *input_to_fo // Configure block that calculates the cell state // cell_state = Clip((PixelwiseMul(input_gate, Activation(input * input_to_cell_weights + output_state * recurrent_to_cell_weights + cell_bias)) + PixelwiseMul(forget_gate, cell_state)), cell_threshold) _memory_group.manage(&_cell_state_out1); - _fully_connected_cell_state.configure(input, input_to_cell_weights, cell_bias, &_cell_state_out1, true, false); + _fully_connected_cell_state.configure(input, input_to_cell_weights, cell_bias, &_cell_state_out1); _memory_group.manage(&_cell_state_out2); _transpose_cell_state.configure(recurrent_to_cell_weights, &_cell_state_out2); _memory_group.manage(&_cell_state_out3); @@ -204,7 +204,7 @@ void CLLSTMLayer::configure(const ICLTensor *input, const ICLTensor *input_to_fo // Configure block that calculates the output // output_state = Activation(input * input_to_output_weights + output_state * recurrent_to_output_weights + PixelWiseMul(cell_state, cell_to_output_weights) + output_gate_bias) _memory_group.manage(&_output1); - _fully_connected_output.configure(input, input_to_output_weights, output_gate_bias, &_output1, true, false); + _fully_connected_output.configure(input, input_to_output_weights, output_gate_bias, &_output1); _memory_group.manage(&_output2); _transpose_output.configure(recurrent_to_output_weights, &_output2); _memory_group.manage(&_output3); @@ -255,7 +255,7 @@ void CLLSTMLayer::configure(const ICLTensor *input, const ICLTensor *input_to_fo _has_projection_weights = true; _output_projection1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type())); _memory_group.manage(&_output_projection1); - _fully_connected_output_state.configure(output_state, lstm_params.projection_weights(), lstm_params.projection_bias(), &_output_projection1, true, false); + _fully_connected_output_state.configure(output_state, lstm_params.projection_weights(), lstm_params.projection_bias(), &_output_projection1); // Perform clipping if(projection_threshold != 0.f) { @@ -326,7 +326,7 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, const ITensorInfo *input_ const TensorInfo num_units_transposed_info = TensorInfo(num_units_transposed_shape, 1, input->data_type()); // Validate forget gate - ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, input_to_forget_weights, forget_gate_bias, cell_state, true, false)); + ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, input_to_forget_weights, forget_gate_bias, cell_state)); ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(output_state, &units_out_transposed_info, nullptr, cell_state, 1.f, 0.f, GEMMInfo())); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAdditionKernel::validate(cell_state, cell_state, cell_state, ConvertPolicy::SATURATE)); if(lstm_params.has_peephole_opt()) @@ -344,7 +344,7 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, const ITensorInfo *input_ ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.recurrent_to_input_weights()->num_dimensions() > 2); ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_input_weights()->num_dimensions() > 1); ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_gate_bias()->num_dimensions() > 1); - ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, lstm_params.input_to_input_weights(), lstm_params.input_gate_bias(), cell_state, true, false)); + ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, lstm_params.input_to_input_weights(), lstm_params.input_gate_bias(), cell_state)); ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(cell_state, &num_units_transposed_info, nullptr, &gemmv_shape_info, 1.f, 0.f, GEMMInfo())); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(cell_state, &gemmv_shape_info, cell_state, ConvertPolicy::SATURATE)); ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(cell_state, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))); @@ -355,7 +355,7 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, const ITensorInfo *input_ } // Validate cell state - ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, input_to_cell_weights, cell_bias, cell_state, true, false)); + ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, input_to_cell_weights, cell_bias, cell_state)); ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(cell_state, nullptr, activation_info)); ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplicationKernel::validate(cell_state, cell_state, cell_state, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN)); @@ -364,7 +364,7 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, const ITensorInfo *input_ ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(cell_state, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, cell_threshold))); } - ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, input_to_output_weights, output_gate_bias, cell_state, true, false)); + ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, input_to_output_weights, output_gate_bias, cell_state)); if(lstm_params.has_peephole_opt()) { ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(cell_state, cell_state, cell_state, ConvertPolicy::SATURATE)); @@ -376,7 +376,7 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, const ITensorInfo *input_ ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplicationKernel::validate(cell_state, output, output_state, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN)); if(lstm_params.has_projection()) { - ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(output_state, lstm_params.projection_weights(), lstm_params.projection_bias(), cell_state, true, false)); + ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(output_state, lstm_params.projection_weights(), lstm_params.projection_bias(), cell_state)); if(projection_threshold != 0.f) { ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(cell_state, output_state, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, diff --git a/src/runtime/CL/functions/CLRNNLayer.cpp b/src/runtime/CL/functions/CLRNNLayer.cpp index 0e1b9d5b58..1809e6e64e 100644 --- a/src/runtime/CL/functions/CLRNNLayer.cpp +++ b/src/runtime/CL/functions/CLRNNLayer.cpp @@ -58,7 +58,7 @@ Status CLRNNLayer::validate(const ITensorInfo *input, const ITensorInfo *weights auto shape_info = TensorInfo(compute_rnn_shape(recurrent_weights, hidden_state->dimension(idx_height)), 1, input->data_type()); - ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, weights, bias, &shape_info, true, false)); + ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, weights, bias, &shape_info)); ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(hidden_state, recurrent_weights, nullptr, &shape_info, 1.f, 0.f)); ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAdditionKernel::validate(&shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE)); ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&shape_info, &shape_info, info)); @@ -82,7 +82,7 @@ void CLRNNLayer::configure(const ICLTensor *input, const ICLTensor *weights, con // Manage intermediate buffers and configure _memory_group.manage(&_fully_connected_out); - _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out, true, false); + _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out); _memory_group.manage(&_gemm_output); _gemm_state_f.configure(hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f); diff --git a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp index ab2c6c2813..6b8e341b14 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp @@ -80,14 +80,14 @@ void GCFullyConnectedLayer::configure_fc_fc(const IGCTensor *input, const IGCTen } void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, - bool transpose_weights, bool are_weights_reshaped, bool retain_internal_weights) + FullyConnectedLayerInfo fc_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 2); _original_weights = weights; - _are_weights_reshaped = transpose_weights ? are_weights_reshaped : true; + _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; _is_fc_after_conv = true; _accumulate_biases = false; @@ -142,8 +142,8 @@ void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *w configure_fc_fc(input, weights_to_use, output); } - ARM_COMPUTE_ERROR_ON(retain_internal_weights && _reshape_weights_output.gc_buffer() == 0); - _are_weights_reshaped = _are_weights_reshaped || retain_internal_weights; + ARM_COMPUTE_ERROR_ON(fc_info.retain_internal_weights && _reshape_weights_output.gc_buffer() == 0); + _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights; } void GCFullyConnectedLayer::run() diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index 3126823e9c..1aab3a05e0 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -136,7 +136,8 @@ NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> mem { } -void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights, bool are_weights_reshaped) +void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, + FullyConnectedLayerInfo fc_info) { // With the Fully Connected layer we can have 4 different cases: // 1) Convolution layer -> Fully Connected layer without batches @@ -156,8 +157,7 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), - transpose_weights, - are_weights_reshaped)); + fc_info)); const int num_batch_dimensions = std::max(0, static_cast<int>(output->info()->tensor_shape().num_dimensions()) - 1); const int num_input_dimensions = input->info()->tensor_shape().num_dimensions() - num_batch_dimensions; @@ -167,7 +167,7 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh _linearize_input = (input->info()->tensor_shape().x() != linear_input_size) || (num_input_dimensions > 1 && linear_input_size == 1); _accumulate_biases = biases != nullptr; _is_batched_fc_layer = num_batch_dimensions > 0; - _is_prepared = are_weights_reshaped || (!transpose_weights && !_is_batched_fc_layer); + _is_prepared = fc_info.are_weights_reshaped || (!fc_info.transpose_weights && !_is_batched_fc_layer); const size_t interleave_width = 16 / input->info()->element_size(); const ITensor *weights_to_use = weights; @@ -177,11 +177,11 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh weights_to_use = &_reshape_weights_output; _reshape_weights_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_fully_connected_reshaped_weights_shape(weights->info(), - transpose_weights, + fc_info.transpose_weights, _is_batched_fc_layer, interleave_width))); // Reshape the weights - _reshape_weights_function.configure(weights, &_reshape_weights_output, transpose_weights, _is_batched_fc_layer); + _reshape_weights_function.configure(weights, &_reshape_weights_output, fc_info.transpose_weights, _is_batched_fc_layer); } const ITensor *multiply_input = input; @@ -231,7 +231,8 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh } } -Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, bool transpose_weights, bool are_weights_reshaped) +Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, + FullyConnectedLayerInfo fc_info) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); @@ -251,11 +252,11 @@ Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn const ITensorInfo *weights_to_use = weights; std::unique_ptr<ITensorInfo> reshape_weights_output = input->clone(); - if(!are_weights_reshaped && (transpose_weights || is_batched_fc_layer)) + if(!fc_info.are_weights_reshaped && (fc_info.transpose_weights || is_batched_fc_layer)) { - reshape_weights_output->set_tensor_shape(compute_fully_connected_reshaped_weights_shape(weights, transpose_weights, is_batched_fc_layer, interleave_width)); + reshape_weights_output->set_tensor_shape(compute_fully_connected_reshaped_weights_shape(weights, fc_info.transpose_weights, is_batched_fc_layer, interleave_width)); - ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayerReshapeWeights::validate(weights, reshape_weights_output.get(), transpose_weights, is_batched_fc_layer)); + ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayerReshapeWeights::validate(weights, reshape_weights_output.get(), fc_info.transpose_weights, is_batched_fc_layer)); weights_to_use = reshape_weights_output.get(); } diff --git a/src/runtime/NEON/functions/NERNNLayer.cpp b/src/runtime/NEON/functions/NERNNLayer.cpp index 08017e20c3..f77566a108 100644 --- a/src/runtime/NEON/functions/NERNNLayer.cpp +++ b/src/runtime/NEON/functions/NERNNLayer.cpp @@ -57,7 +57,7 @@ Status NERNNLayer::validate(const ITensorInfo *input, const ITensorInfo *weights auto shape_info = TensorInfo(misc::shape_calculator::compute_rnn_shape(recurrent_weights, hidden_state->dimension(idx_height)), 1, input->data_type()); - ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, weights, bias, &shape_info, true, false)); + ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, weights, bias, &shape_info)); ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(&shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE)); ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayerKernel::validate(&shape_info, &shape_info, info)); @@ -79,7 +79,7 @@ void NERNNLayer::configure(const ITensor *input, const ITensor *weights, const I // Manage intermediate buffers and configure _fully_connected_out.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); _memory_group.manage(&_fully_connected_out); - _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out, true, false); + _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out); _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type())); _memory_group.manage(&_gemm_output); |