diff options
Diffstat (limited to 'src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp')
-rw-r--r-- | src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp | 40 |
1 files changed, 25 insertions, 15 deletions
diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp index 5ff4fbceee..025a16b4fb 100644 --- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp @@ -69,7 +69,7 @@ bool check_support_fast_math(const Size2D &output_tile, const Size2D &kernel_siz CLWinogradConvolutionLayer::CLWinogradConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) : _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(), _output_transform(), _activationlayer_function(), _input0(), _input1(), _batched_mm_output(), - _is_first_run(true), _is_activationlayer_enabled(false) + _original_weights(nullptr), _is_prepared(false), _is_activationlayer_enabled(false) { } @@ -97,6 +97,9 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we conv_info, input->info()->data_layout()); + _is_prepared = false; + _original_weights = weights; + // Manage intermediate tensors _memory_group.manage(&_input0); _memory_group.manage(&_batched_mm_output); @@ -124,7 +127,6 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we // Allocate temporary tensors _input0.allocator()->allocate(); - _input1.allocator()->allocate(); _batched_mm_output.allocator()->allocate(); } @@ -182,11 +184,7 @@ Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITen void CLWinogradConvolutionLayer::run() { - if(_is_first_run) - { - // Run filter transform - CLScheduler::get().enqueue(_filter_transform, false); - } + prepare(); _memory_group.acquire(); @@ -196,13 +194,6 @@ void CLWinogradConvolutionLayer::run() // Run batched matrix multiplication _batched_mm.run(); - // Release reshaped weights if marked unused by CLGEMM - if(_is_first_run && !_input1.is_used()) - { - CLScheduler::get().queue().finish(); - _input1.allocator()->free(); - } - // Run output transform CLScheduler::get().enqueue(_output_transform); @@ -212,6 +203,25 @@ void CLWinogradConvolutionLayer::run() } _memory_group.release(); +} + +void CLWinogradConvolutionLayer::prepare() +{ + if(!_is_prepared) + { + // Run filter transform and mark original weights as unused + _input1.allocator()->allocate(); + CLScheduler::get().enqueue(_filter_transform, false); + _original_weights->mark_as_unused(); + + // Prepare GEMM and release reshaped weights if marked unused by CLGEMM + _batched_mm.prepare(); + if(!_input1.is_used()) + { + _input1.allocator()->free(); + } - _is_first_run = false; + CLScheduler::get().queue().finish(); + _is_prepared = true; + } } |