From 72219330fd85b1271e714d4ba894d6d8e26340c9 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 5 Jun 2018 14:56:06 +0100 Subject: COMPMID-1145: (API) Introduce prepare() stage (NEON/CL/GLES) Change-Id: I5b46764f9c3154ec3e3b9c951cc9e6dfbcb81dfb Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/134255 Reviewed-by: Anthony Barbier Tested-by: Jenkins Reviewed-by: Pablo Tello Reviewed-by: Michele DiGiorgio --- .../NEON/functions/NEWinogradConvolutionLayer.cpp | 40 +++++++++++++++------- 1 file changed, 28 insertions(+), 12 deletions(-) (limited to 'src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp') diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp index d6bc5cfd9a..d9f6c0e0f8 100644 --- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp @@ -113,7 +113,7 @@ bool check_support_fast_math(const Size2D &output_tile, const Size2D &kernel_siz NEWinogradConvolutionLayer::NEWinogradConvolutionLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _arm_gemm(nullptr), _gemm_kernel(nullptr), _transform_input_kernel(nullptr), _transform_output_kernel(nullptr), _transform_weights_kernel(nullptr), _activationlayer_function(), _permute_input(), _permute_weights(), _permute_output(), _input_workspace(), _output_workspace(), _kernel_storage(), _input_nhwc(), _output_nhwc(), _weights_hwio(), - _workspace(), _input(), _weights(), _output(), _reshaped_kernel(false), _is_activationlayer_enabled(false) + _workspace(), _input(), _weights(), _output(), _is_prepared(false), _is_activationlayer_enabled(false) { } /* arm_compute */ @@ -139,9 +139,10 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor * ARM_COMPUTE_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size), "This Winograd configuration requires enable_fast_math=true"); } - _weights = weights; - _input = input; - _output = output; + _weights = weights; + _input = input; + _output = output; + _is_prepared = false; std::unique_ptr> transform_input_kernel; std::unique_ptr> transform_weights_kernel; @@ -198,10 +199,13 @@ void NEWinogradConvolutionLayer::configure(const ITensor *input, const ITensor * const Tensor4DShape in_shape(internal_get_input_shape(input)); const size_t data_type_size = input->info()->element_size(); // Get the memory required to instantiate a new Winograd operator. - constexpr size_t storage_alignment = 64; - const size_t kernel_storage_size = transform_weights_kernel->get_weight_storage_size(out_channels, in_channels) * data_type_size; + constexpr size_t storage_alignment = 64; + + // Kernel Storage + const size_t kernel_storage_size = transform_weights_kernel->get_weight_storage_size(out_channels, in_channels) * data_type_size; _kernel_storage.allocator()->init(TensorInfo(TensorShape{ (kernel_storage_size + storage_alignment - 1) }, 1, DataType::U8)); _kernel_storage.allocator()->allocate(); + // Input storage const size_t input_storage_size = transform_input_kernel->get_input_storage_size(in_shape.n_batches, in_shape.n_channels, in_shape.n_rows, in_shape.n_cols, use_same_padding) * data_type_size; _input_workspace.allocator()->init(TensorInfo(TensorShape{ (input_storage_size + storage_alignment - 1) }, 1, DataType::U8)); @@ -331,13 +335,9 @@ void NEWinogradConvolutionLayer::run() { const DataLayout data_layout = _input->info()->data_layout(); + prepare(); + _memory_group.acquire(); - if(!_reshaped_kernel) - { - _reshaped_kernel = true; - _permute_weights.run(); - NEScheduler::get().schedule(_transform_weights_kernel.get(), Window::DimX); - } if(data_layout == DataLayout::NCHW) { @@ -491,4 +491,20 @@ Status NEWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITen return Status{}; } +void NEWinogradConvolutionLayer::prepare() +{ + if(!_is_prepared) + { + // Permute weights + _permute_weights.run(); + _weights->mark_as_unused(); + + // Transform weights + NEScheduler::get().schedule(_transform_weights_kernel.get(), Window::DimX); + _weights_hwio.allocator()->free(); + + _is_prepared = true; + } +} + } // namespace arm_compute -- cgit v1.2.1