From 1562be3e8a449360a90af75f6f1481a30d41be75 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 8 Mar 2018 19:09:19 +0000 Subject: COMPMID-998: Release unused trainable parameters. Change-Id: I361a520f34080016a25bc86e1e6789777c5152c1 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/124432 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- .../CL/functions/CLDepthwiseConvolutionLayer.cpp | 23 +++++++++++++++------- src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 8 +++++++- .../CL/functions/CLGEMMConvolutionLayer.cpp | 14 +++++++++---- .../CL/functions/CLLocallyConnectedLayer.cpp | 12 ++++++++--- .../NEON/functions/NEDepthwiseConvolutionLayer.cpp | 21 ++++++++++++++++---- .../NEON/functions/NEFullyConnectedLayer.cpp | 8 +++++++- .../NEON/functions/NEGEMMConvolutionLayer.cpp | 8 +++++++- .../NEON/functions/NELocallyConnectedLayer.cpp | 12 ++++++++--- 8 files changed, 82 insertions(+), 24 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp index 112af60f35..8d7c92bdf1 100644 --- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp @@ -64,7 +64,7 @@ void CLDepthwiseConvolutionLayer3x3::run() CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayer() : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _input_reshaped(), - _weights_reshaped(), _v2mm_output(), _output_reshaped(), _is_quantized(false) + _weights_reshaped(), _v2mm_output(), _output_reshaped(), _is_first_run(true), _is_quantized(false), _original_weights(nullptr) { } @@ -78,7 +78,9 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w const size_t weights_h = weights->info()->dimension(1); const size_t weights_z = weights->info()->dimension(2); - _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); + _is_first_run = true; + _original_weights = weights; + _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); bool append_bias = (biases != nullptr) && !_is_quantized; const GPUTarget gpu_target = CLScheduler::get().target(); @@ -154,16 +156,23 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w void CLDepthwiseConvolutionLayer::run() { - CLScheduler::get().enqueue(_im2col_kernel); + // Run weights reshaping (Runs once for every configure) + if(_is_first_run) + { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - CLScheduler::get().enqueue(_weights_reshape_kernel); + CLScheduler::get().enqueue(_weights_reshape_kernel); + CLScheduler::get().enqueue(_v2mm_weights_fill_border); + _is_first_run = false; + + // Mark original weights tensor as unused + _original_weights->mark_as_unused(); + } + CLScheduler::get().enqueue(_im2col_kernel); CLScheduler::get().enqueue(_v2mm_input_fill_border); - CLScheduler::get().enqueue(_v2mm_weights_fill_border); CLScheduler::get().enqueue(_v2mm_kernel); - CLScheduler::get().enqueue(_vector_to_tensor_kernel); - if(_is_quantized) { CLScheduler::get().enqueue(_output_stage_kernel); diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index 2b4670b98c..676706fb17 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -76,7 +76,7 @@ Status CLFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, c CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr memory_manager) : _memory_group(memory_manager), _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _accumulate_biases_kernel(), _im2col_output(), - _gemmlowp_output(), _reshape_weights_output(), _are_weights_reshaped(true), _is_fc_after_conv(true), _accumulate_biases(false), _is_quantized(false) + _gemmlowp_output(), _reshape_weights_output(), _are_weights_reshaped(true), _is_fc_after_conv(true), _accumulate_biases(false), _is_quantized(false), _original_weights(nullptr) { } @@ -152,6 +152,7 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w _is_fc_after_conv = true; _accumulate_biases = false; _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); + _original_weights = weights; // Configure gemmlowp output if(_is_quantized) @@ -316,8 +317,13 @@ void CLFullyConnectedLayer::run() // Reshape of the weights (happens only once) if(!_are_weights_reshaped) { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + _are_weights_reshaped = true; _reshape_weights_kernel.run(); + + // Mark original weights tensor as unused + _original_weights->mark_as_unused(); } _memory_group.acquire(); diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index e7ad62f5ff..f43e100565 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -90,8 +90,8 @@ void CLConvolutionLayerReshapeWeights::run() } CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr memory_manager) - : _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _im2col_output(), - _weights_reshaped(), _gemm_output(), _tmp_output(), _is_quantized(false), _is_first_run(true) + : _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _original_weights(nullptr), + _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _is_quantized(false), _is_first_run(true) { } @@ -164,7 +164,9 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * weights_info, dilation)); - _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); + _is_first_run = true; + _original_weights = weights; + _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); const DataType dt = input->info()->data_type(); @@ -349,9 +351,13 @@ void CLGEMMConvolutionLayer::run() // Run weights reshaping (Runs once for every configure) if(_is_first_run) { - _reshape_weights.run(); + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + _reshape_weights.run(); _is_first_run = false; + + // Mark original weights tensor as unused + _original_weights->mark_as_unused(); } _memory_group.acquire(); diff --git a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp index a3eb5010bd..986fe00973 100644 --- a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp @@ -73,7 +73,7 @@ void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, cons CLLocallyConnectedLayer::CLLocallyConnectedLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(), - _is_first_run(false) + _is_first_run(false), _original_weights(nullptr) { } @@ -126,8 +126,9 @@ void CLLocallyConnectedLayer::configure(const ICLTensor *input, const ICLTensor ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_ERROR_THROW_ON(CLLocallyConnectedLayer::validate(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), conv_info)); - bool _has_bias = (biases != nullptr); - _is_first_run = true; + bool _has_bias = (biases != nullptr); + _original_weights = weights; + _is_first_run = true; const unsigned int kernel_width = weights->info()->dimension(0); const unsigned int kernel_height = weights->info()->dimension(1); @@ -169,8 +170,13 @@ void CLLocallyConnectedLayer::run() // Run weights reshaping (Runs once for every configure) if(_is_first_run) { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + _is_first_run = false; CLScheduler::get().enqueue(_weights_reshape_kernel); + + // Mark original weights tensor as unused + _original_weights->mark_as_unused(); } _memory_group.acquire(); diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index 95fcf8805e..f28ed715f6 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -153,7 +153,7 @@ void NEDepthwiseConvolutionLayer3x3::run() NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer() : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _input_reshaped(), - _weights_reshaped(), _v2mm_output(), _output_reshaped(), _is_quantized(false) + _weights_reshaped(), _v2mm_output(), _output_reshaped(), _is_first_run(true), _is_quantized(false), _original_weights(nullptr) { } @@ -167,7 +167,9 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh const size_t weights_h = weights->info()->dimension(1); const size_t weights_z = weights->info()->dimension(2); - _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); + _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); + _is_first_run = true; + _original_weights = weights; // Should bias be appended ? bool append_bias = (biases != nullptr) && !_is_quantized; @@ -241,10 +243,21 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh void NEDepthwiseConvolutionLayer::run() { + // Run weights reshaping (Runs once for every configure) + if(_is_first_run) + { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + + NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX); + NEScheduler::get().schedule(&_v2mm_weights_fill_border, Window::DimX); + _is_first_run = false; + + // Mark original weights tensor as unused + _original_weights->mark_as_unused(); + } + NEScheduler::get().schedule(&_im2col_kernel, Window::DimX); - NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX); NEScheduler::get().schedule(&_v2mm_input_fill_border, Window::DimX); - NEScheduler::get().schedule(&_v2mm_weights_fill_border, Window::DimX); NEScheduler::get().schedule(&_v2mm_kernel, Window::DimX); NEScheduler::get().schedule(&_vector_to_tensor_kernel, Window::DimX); if(_is_quantized) diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index 26b7271710..b310ad35e3 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -132,7 +132,7 @@ void NEFullyConnectedLayerReshapeWeights::run() NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _im2col_kernel(), _reshape_weights_kernel(), _interleave4x4_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _interleave4x4_output(), - _reshape_weights_output(), _are_weights_reshaped(false), _is_batched_fc_layer(false), _linearize_input(false), _accumulate_biases(false) + _reshape_weights_output(), _are_weights_reshaped(false), _is_batched_fc_layer(false), _linearize_input(false), _accumulate_biases(false), _original_weights(nullptr) { } @@ -163,6 +163,7 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh const int num_input_dimensions = input->info()->tensor_shape().num_dimensions() - num_batch_dimensions; const size_t linear_input_size = input->info()->tensor_shape().total_size_lower(num_input_dimensions); + _original_weights = weights; _linearize_input = (input->info()->tensor_shape().x() != linear_input_size) || (num_input_dimensions > 1 && linear_input_size == 1); _are_weights_reshaped = are_weights_reshaped; _accumulate_biases = biases != nullptr; @@ -324,8 +325,13 @@ void NEFullyConnectedLayer::run() // Reshape of the weights (happens only once) if(!_are_weights_reshaped) { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + _are_weights_reshaped = true; _reshape_weights_kernel.run(); + + // Mark original weights tensor as unused + _original_weights->mark_as_unused(); } _memory_group.acquire(); diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index d9707d95e0..b2dd0227a5 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -217,7 +217,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr &memory_manager) : _asm_glue(), _memory_group(memory_manager), _input_im2col_kernel(), _input_interleave_kernel(), _reshape_weights(), _mm_kernel(), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), - _output_col2im_kernel(), _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _gemm_output(), _tmp_output(), _workspace(), _append_bias(false), + _output_col2im_kernel(), _original_weights(nullptr), _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _gemm_output(), _tmp_output(), _workspace(), _append_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false), _is_quantized(false), _is_interleaved(false) { } @@ -267,6 +267,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig ARM_COMPUTE_ERROR_THROW_ON(status); + _original_weights = weights; const unsigned int fixed_point_position = input->info()->fixed_point_position(); const ITensor *biases_to_use = (_append_bias) ? biases : nullptr; @@ -549,8 +550,13 @@ void NEGEMMConvolutionLayer::run() // Run weights reshaping (Runs once for every configure) if(!_are_weights_reshaped) { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + _are_weights_reshaped = true; _reshape_weights.run(); + + // Mark original weights tensor as unused + _original_weights->mark_as_unused(); } _memory_group.acquire(); diff --git a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp index 973559441f..913acf86a2 100644 --- a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp @@ -73,7 +73,7 @@ void calculate_shapes(const ITensorInfo *input, const ITensorInfo *weights, cons NELocallyConnectedLayer::NELocallyConnectedLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(), - _is_first_run(false) + _is_first_run(false), _original_weights(nullptr) { } @@ -126,8 +126,9 @@ void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *wei ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_ERROR_THROW_ON(NELocallyConnectedLayer::validate(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), conv_info)); - bool _has_bias = (biases != nullptr); - _is_first_run = true; + bool _has_bias = (biases != nullptr); + _is_first_run = true; + _original_weights = weights; const unsigned int kernel_width = weights->info()->dimension(0); const unsigned int kernel_height = weights->info()->dimension(1); @@ -169,8 +170,13 @@ void NELocallyConnectedLayer::run() // Run weights reshaping (Runs once for every configure) if(_is_first_run) { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + _is_first_run = false; NEScheduler::get().schedule(&_weights_reshape_kernel, 3); + + // Mark original weights tensor as unused + _original_weights->mark_as_unused(); } _memory_group.acquire(); -- cgit v1.2.1