diff options
-rw-r--r-- | arm_compute/runtime/CL/functions/CLGEMM.h | 9 | ||||
-rw-r--r-- | src/graph/backends/CL/CLFunctionsFactory.cpp | 4 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLConvolutionLayer.cpp | 2 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLGEMM.cpp | 15 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp | 10 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp | 11 |
6 files changed, 42 insertions, 9 deletions
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h index f2dd60340c..c5d7b86384 100644 --- a/arm_compute/runtime/CL/functions/CLGEMM.h +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -56,6 +56,14 @@ public: * @param[in] memory_manager (Optional) Memory manager. */ CLGEMM(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMM(const CLGEMM &) = delete; + /** Default move constructor */ + CLGEMM(CLGEMM &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMM &operator=(const CLGEMM &) = delete; + /** Default move assignment operator */ + CLGEMM &operator=(CLGEMM &&) = default; /** Initialise the kernel's inputs and output * * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. @@ -101,6 +109,7 @@ private: CLGEMMMatrixAdditionKernel _ma_kernel; CLTensor _tmp_a; CLTensor _tmp_b; + const ICLTensor *_original_b; bool _is_interleaved_transposed; bool _run_addition; bool _is_first_run; diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp index ad73a797e3..ece63646ea 100644 --- a/src/graph/backends/CL/CLFunctionsFactory.cpp +++ b/src/graph/backends/CL/CLFunctionsFactory.cpp @@ -174,8 +174,8 @@ std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node, if(conv_algorithm == ConvolutionMethod::WINOGRAD) { - std::tie(func, func_name) = create_named_function<CLWinogradConvolutionLayer>( - std::string("CLWinogradConvolutionLayer"), input, weights, biases, output, conv_info); + std::tie(func, func_name) = create_named_memory_managed_function<CLWinogradConvolutionLayer>( + std::string("CLWinogradConvolutionLayer"), mm, input, weights, biases, output, conv_info); } else if(conv_algorithm == ConvolutionMethod::DIRECT) { diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 643e24d638..97ef895434 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -53,7 +53,7 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c { case ConvolutionMethod::WINOGRAD: { - auto f = arm_compute::support::cpp14::make_unique<CLWinogradConvolutionLayer>(); + auto f = arm_compute::support::cpp14::make_unique<CLWinogradConvolutionLayer>(_memory_manager); f->configure(input, weights, biases, output, conv_info); _function = std::move(f); break; diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp index cf41eccc35..bff5781300 100644 --- a/src/runtime/CL/functions/CLGEMM.cpp +++ b/src/runtime/CL/functions/CLGEMM.cpp @@ -92,8 +92,8 @@ Status validate_arguments(const ITensorInfo *a, const ITensorInfo *b, const ICLT } // namespace CLGEMM::CLGEMM(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _is_interleaved_transposed(false), _run_addition(false), - _is_first_run(true), _reshape_b_only_on_first_run(false) + : _memory_group(std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _original_b(nullptr), _is_interleaved_transposed(false), + _run_addition(false), _is_first_run(true), _reshape_b_only_on_first_run(false) { } @@ -104,6 +104,9 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor * // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(a->info(), b->info(), c, output->info(), alpha, beta, gemm_info)); + // Store original b matrix + _original_b = b; + // Check if we need to reshape the matrix B only on the first run _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); @@ -192,7 +195,11 @@ void CLGEMM::run() // Run transpose kernel CLScheduler::get().enqueue(_transpose_kernel, false); - _is_first_run = false; + // Mark original b matrix as unused + if(_reshape_b_only_on_first_run) + { + _original_b->mark_as_unused(); + } } else if(!_reshape_b_only_on_first_run) { @@ -211,4 +218,6 @@ void CLGEMM::run() } _memory_group.release(); + + _is_first_run = false; } diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index 084c4df718..87c4a306aa 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -374,7 +374,6 @@ void CLGEMMConvolutionLayer::run() ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); _reshape_weights.run(); - _is_first_run = false; // Mark original weights tensor as unused _original_weights->mark_as_unused(); @@ -398,6 +397,13 @@ void CLGEMMConvolutionLayer::run() { // Run gemm _mm_gemm.run(); + + // Release reshaped weights if marked unused by CLGEMM + if(_is_first_run && !_weights_reshaped.is_used()) + { + CLScheduler::get().queue().finish(); + _weights_reshaped.allocator()->free(); + } } // Reshape output matrix @@ -410,4 +416,6 @@ void CLGEMMConvolutionLayer::run() } _memory_group.release(); + + _is_first_run = false; } diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp index 86ccddac88..65747cf5d7 100644 --- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp @@ -151,8 +151,6 @@ void CLWinogradConvolutionLayer::run() { // Run filter transform CLScheduler::get().enqueue(_filter_transform, false); - - _is_first_run = false; } _memory_group.acquire(); @@ -163,6 +161,13 @@ void CLWinogradConvolutionLayer::run() // Run batched matrix multiplication _batched_mm.run(); + // Release reshaped weights if marked unused by CLGEMM + if(_is_first_run && !_input1.is_used()) + { + CLScheduler::get().queue().finish(); + _input1.allocator()->free(); + } + // Run output transform CLScheduler::get().enqueue(_output_transform); @@ -172,4 +177,6 @@ void CLWinogradConvolutionLayer::run() } _memory_group.release(); + + _is_first_run = false; } |