aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp')
-rw-r--r--src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp40
1 files changed, 25 insertions, 15 deletions
diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
index 5ff4fbceee..025a16b4fb 100644
--- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
@@ -69,7 +69,7 @@ bool check_support_fast_math(const Size2D &output_tile, const Size2D &kernel_siz
CLWinogradConvolutionLayer::CLWinogradConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(), _output_transform(), _activationlayer_function(), _input0(), _input1(), _batched_mm_output(),
- _is_first_run(true), _is_activationlayer_enabled(false)
+ _original_weights(nullptr), _is_prepared(false), _is_activationlayer_enabled(false)
{
}
@@ -97,6 +97,9 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we
conv_info,
input->info()->data_layout());
+ _is_prepared = false;
+ _original_weights = weights;
+
// Manage intermediate tensors
_memory_group.manage(&_input0);
_memory_group.manage(&_batched_mm_output);
@@ -124,7 +127,6 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we
// Allocate temporary tensors
_input0.allocator()->allocate();
- _input1.allocator()->allocate();
_batched_mm_output.allocator()->allocate();
}
@@ -182,11 +184,7 @@ Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITen
void CLWinogradConvolutionLayer::run()
{
- if(_is_first_run)
- {
- // Run filter transform
- CLScheduler::get().enqueue(_filter_transform, false);
- }
+ prepare();
_memory_group.acquire();
@@ -196,13 +194,6 @@ void CLWinogradConvolutionLayer::run()
// Run batched matrix multiplication
_batched_mm.run();
- // Release reshaped weights if marked unused by CLGEMM
- if(_is_first_run && !_input1.is_used())
- {
- CLScheduler::get().queue().finish();
- _input1.allocator()->free();
- }
-
// Run output transform
CLScheduler::get().enqueue(_output_transform);
@@ -212,6 +203,25 @@ void CLWinogradConvolutionLayer::run()
}
_memory_group.release();
+}
+
+void CLWinogradConvolutionLayer::prepare()
+{
+ if(!_is_prepared)
+ {
+ // Run filter transform and mark original weights as unused
+ _input1.allocator()->allocate();
+ CLScheduler::get().enqueue(_filter_transform, false);
+ _original_weights->mark_as_unused();
+
+ // Prepare GEMM and release reshaped weights if marked unused by CLGEMM
+ _batched_mm.prepare();
+ if(!_input1.is_used())
+ {
+ _input1.allocator()->free();
+ }
- _is_first_run = false;
+ CLScheduler::get().queue().finish();
+ _is_prepared = true;
+ }
}