aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-05-02 14:07:55 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:51:17 +0000
commite043767d068da389308507011d944e6db9e4d676 (patch)
tree30c8965d8d03d141c7630420c6e945f78485efc7 /src/runtime
parent019634f8befde24b19bae9b749e75a9f3ae44801 (diff)
downloadComputeLibrary-e043767d068da389308507011d944e6db9e4d676.tar.gz
COMPMID-920: Introduce prepare() stage
Change-Id: I08ddb7f6e061178e7566518b48e4e18f8f078596 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129825 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/CL/functions/CLConvolutionLayer.cpp6
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp46
-rw-r--r--src/runtime/CL/functions/CLGEMM.cpp39
-rw-r--r--src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp52
-rw-r--r--src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp40
5 files changed, 111 insertions, 72 deletions
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 83281e1747..3d4fb113b2 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -135,5 +135,11 @@ ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *
void CLConvolutionLayer::run()
{
+ prepare();
_function->run();
}
+
+void CLConvolutionLayer::prepare()
+{
+ _function->prepare();
+}
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index 9b3bf48bca..151fa1b5fa 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -220,13 +220,6 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w
_gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset);
_gemmlowp_output.allocator()->allocate();
}
-
- // Allocate the transpose tensor if the are_weights_reshaped flag is false and once all the configure methods have been called
- if(!_are_weights_reshaped)
- {
- // Allocate the tensor for the weights reshaped
- _reshape_weights_output.allocator()->allocate();
- }
}
Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, bool transpose_weights, bool are_weights_reshaped)
@@ -311,17 +304,7 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
void CLFullyConnectedLayer::run()
{
- // Reshape of the weights (happens only once)
- if(!_are_weights_reshaped)
- {
- ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
-
- _are_weights_reshaped = true;
- _reshape_weights_kernel.run();
-
- // Mark original weights tensor as unused
- _original_weights->mark_as_unused();
- }
+ prepare();
_memory_group.acquire();
@@ -356,3 +339,30 @@ void CLFullyConnectedLayer::run()
_memory_group.release();
}
+
+void CLFullyConnectedLayer::prepare()
+{
+ // Reshape of the weights (happens only once)
+ if(!_are_weights_reshaped)
+ {
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
+ // Run reshape weights kernel and mark weights as unused
+ _reshape_weights_output.allocator()->allocate();
+ _reshape_weights_kernel.run();
+ _original_weights->mark_as_unused();
+
+ // Prepare GEMM prepare and release unused weights
+ if(!_is_quantized)
+ {
+ _mm_gemm.prepare();
+ if(!_reshape_weights_output.is_used())
+ {
+ _reshape_weights_output.allocator()->free();
+ }
+ }
+
+ CLScheduler::get().queue().finish();
+ _are_weights_reshaped = true;
+ }
+}
diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp
index 37fa0c5ba2..e735adba39 100644
--- a/src/runtime/CL/functions/CLGEMM.cpp
+++ b/src/runtime/CL/functions/CLGEMM.cpp
@@ -98,7 +98,7 @@ Status validate_arguments(const ITensorInfo *a, const ITensorInfo *b, const ICLT
CLGEMM::CLGEMM(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _original_b(nullptr), _is_interleaved_transposed(false),
- _run_addition(false), _is_first_run(true), _reshape_b_only_on_first_run(false)
+ _run_addition(false), _reshape_b_only_on_first_run(false), _is_prepared(false)
{
}
@@ -114,6 +114,7 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *
// Check if we need to reshape the matrix B only on the first run
_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
+ _is_prepared = false;
const ICLTensor *matrix_a = a;
const ICLTensor *matrix_b = b;
@@ -169,7 +170,10 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *
{
// Allocate intermediate tensors
_tmp_a.allocator()->allocate();
- _tmp_b.allocator()->allocate();
+ if(!_reshape_b_only_on_first_run)
+ {
+ _tmp_b.allocator()->allocate();
+ }
}
// Configure matrix addition kernel
@@ -188,6 +192,8 @@ Status CLGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ICLTen
void CLGEMM::run()
{
+ prepare();
+
_memory_group.acquire();
if(_is_interleaved_transposed)
@@ -195,18 +201,7 @@ void CLGEMM::run()
// Run interleave kernel
CLScheduler::get().enqueue(_interleave_kernel, false);
- if(_is_first_run)
- {
- // Run transpose kernel
- CLScheduler::get().enqueue(_transpose_kernel, false);
-
- // Mark original b matrix as unused
- if(_reshape_b_only_on_first_run)
- {
- _original_b->mark_as_unused();
- }
- }
- else if(!_reshape_b_only_on_first_run)
+ if(!_reshape_b_only_on_first_run)
{
// Run transpose kernel
CLScheduler::get().enqueue(_transpose_kernel, false);
@@ -223,6 +218,20 @@ void CLGEMM::run()
}
_memory_group.release();
+}
- _is_first_run = false;
+void CLGEMM::prepare()
+{
+ if(!_is_prepared)
+ {
+ if(_is_interleaved_transposed && _reshape_b_only_on_first_run)
+ {
+ // Run transpose kernel
+ _tmp_b.allocator()->allocate();
+ CLScheduler::get().enqueue(_transpose_kernel, false);
+ _original_b->mark_as_unused();
+ }
+ CLScheduler::get().queue().finish();
+ _is_prepared = true;
+ }
}
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index cf8a6a8a78..610eec4d67 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -91,7 +91,7 @@ void CLConvolutionLayerReshapeWeights::run()
CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager), _reshape_weights(), _im2col_kernel(), _mm_gemm(memory_manager), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _col2im_kernel(), _activationlayer_function(),
- _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _is_quantized(false), _is_first_run(true), _is_activationlayer_enabled(false)
+ _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _tmp_output(), _is_quantized(false), _is_activationlayer_enabled(false), _is_prepared(false)
{
}
@@ -165,7 +165,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
dilation,
act_info));
- _is_first_run = true;
+ _is_prepared = false;
_original_weights = weights;
_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
@@ -258,9 +258,6 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one");
- // Allocate intermediate tensor
- _weights_reshaped.allocator()->allocate();
-
//Configure Activation Layer
_is_activationlayer_enabled = act_info.enabled();
@@ -305,7 +302,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
unsigned int mat_weights_cols = weights->dimension(3);
unsigned int mat_weights_rows = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + bias_element;
- ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayerReshapeWeights::validate(weights, is_quantized? nullptr:biases, nullptr));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayerReshapeWeights::validate(weights, is_quantized ? nullptr : biases, nullptr));
// Create tensor info for im2col reshaped inputs
const unsigned int mat_input_cols = mat_weights_rows;
@@ -369,16 +366,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
void CLGEMMConvolutionLayer::run()
{
- // Run weights reshaping (Runs once for every configure)
- if(_is_first_run)
- {
- ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
-
- _reshape_weights.run();
-
- // Mark original weights tensor as unused
- _original_weights->mark_as_unused();
- }
+ prepare();
_memory_group.acquire();
@@ -398,13 +386,6 @@ void CLGEMMConvolutionLayer::run()
{
// Run gemm
_mm_gemm.run();
-
- // Release reshaped weights if marked unused by CLGEMM
- if(_is_first_run && !_weights_reshaped.is_used())
- {
- CLScheduler::get().queue().finish();
- _weights_reshaped.allocator()->free();
- }
}
// Reshape output matrix
@@ -417,6 +398,29 @@ void CLGEMMConvolutionLayer::run()
}
_memory_group.release();
+}
- _is_first_run = false;
+void CLGEMMConvolutionLayer::prepare()
+{
+ if(!_is_prepared)
+ {
+ // Run weights reshaping and mark as unused
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+ _weights_reshaped.allocator()->allocate();
+ _reshape_weights.run();
+ _original_weights->mark_as_unused();
+
+ // Run GEMM prepare
+ if(!_is_quantized)
+ {
+ _mm_gemm.prepare();
+ if(!_weights_reshaped.is_used())
+ {
+ _weights_reshaped.allocator()->free();
+ }
+ }
+
+ CLScheduler::get().queue().finish();
+ _is_prepared = true;
+ }
}
diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
index 5ff4fbceee..025a16b4fb 100644
--- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
@@ -69,7 +69,7 @@ bool check_support_fast_math(const Size2D &output_tile, const Size2D &kernel_siz
CLWinogradConvolutionLayer::CLWinogradConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(), _output_transform(), _activationlayer_function(), _input0(), _input1(), _batched_mm_output(),
- _is_first_run(true), _is_activationlayer_enabled(false)
+ _original_weights(nullptr), _is_prepared(false), _is_activationlayer_enabled(false)
{
}
@@ -97,6 +97,9 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we
conv_info,
input->info()->data_layout());
+ _is_prepared = false;
+ _original_weights = weights;
+
// Manage intermediate tensors
_memory_group.manage(&_input0);
_memory_group.manage(&_batched_mm_output);
@@ -124,7 +127,6 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we
// Allocate temporary tensors
_input0.allocator()->allocate();
- _input1.allocator()->allocate();
_batched_mm_output.allocator()->allocate();
}
@@ -182,11 +184,7 @@ Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITen
void CLWinogradConvolutionLayer::run()
{
- if(_is_first_run)
- {
- // Run filter transform
- CLScheduler::get().enqueue(_filter_transform, false);
- }
+ prepare();
_memory_group.acquire();
@@ -196,13 +194,6 @@ void CLWinogradConvolutionLayer::run()
// Run batched matrix multiplication
_batched_mm.run();
- // Release reshaped weights if marked unused by CLGEMM
- if(_is_first_run && !_input1.is_used())
- {
- CLScheduler::get().queue().finish();
- _input1.allocator()->free();
- }
-
// Run output transform
CLScheduler::get().enqueue(_output_transform);
@@ -212,6 +203,25 @@ void CLWinogradConvolutionLayer::run()
}
_memory_group.release();
+}
+
+void CLWinogradConvolutionLayer::prepare()
+{
+ if(!_is_prepared)
+ {
+ // Run filter transform and mark original weights as unused
+ _input1.allocator()->allocate();
+ CLScheduler::get().enqueue(_filter_transform, false);
+ _original_weights->mark_as_unused();
+
+ // Prepare GEMM and release reshaped weights if marked unused by CLGEMM
+ _batched_mm.prepare();
+ if(!_input1.is_used())
+ {
+ _input1.allocator()->free();
+ }
- _is_first_run = false;
+ CLScheduler::get().queue().finish();
+ _is_prepared = true;
+ }
}