aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-09-15 16:30:50 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit658039bc4e06be34272eccf559a516a6b52f75f5 (patch)
tree7ab17a9cb5dad7b18f7e3a67b731f2c462b1ac24 /src
parent9e40cf7c21ed0136150101d6c930417d475ecff1 (diff)
downloadComputeLibrary-658039bc4e06be34272eccf559a516a6b52f75f5.tar.gz
COMPMID-534: Add MemoryManager support in NEON functions
Adds support for: -NECannyEdge -NEConvolution -NEDirectConvolution -NEGEMM -NEGEMMLowp -NEGaussian5x5 -NEHOGDescriptor -NEHOGGradient -NEL2Normalize -NELocallyConnectedLayer -NENormalizationLayer -NEScale -NESobel5x5 -NESobel7x7 Change-Id: I68e05aa6054372fa873a882633a15fb97882c00d Reviewed-on: http://mpd-gerrit.cambridge.arm.com/87926 Reviewed-by: Pablo Tello <pablo.tello@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/runtime/NEON/functions/NECannyEdge.cpp32
-rw-r--r--src/runtime/NEON/functions/NEConvolution.cpp12
-rw-r--r--src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp11
-rw-r--r--src/runtime/NEON/functions/NEGEMM.cpp12
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowp.cpp12
-rw-r--r--src/runtime/NEON/functions/NEGaussian5x5.cpp11
-rw-r--r--src/runtime/NEON/functions/NEHOGDescriptor.cpp15
-rw-r--r--src/runtime/NEON/functions/NEHOGGradient.cpp13
-rw-r--r--src/runtime/NEON/functions/NEHOGMultiDetection.cpp37
-rw-r--r--src/runtime/NEON/functions/NEL2Normalize.cpp11
-rw-r--r--src/runtime/NEON/functions/NELocallyConnectedLayer.cpp13
-rw-r--r--src/runtime/NEON/functions/NENormalizationLayer.cpp11
-rw-r--r--src/runtime/NEON/functions/NEOpticalFlow.cpp13
-rw-r--r--src/runtime/NEON/functions/NEScale.cpp38
-rw-r--r--src/runtime/NEON/functions/NESobel5x5.cpp13
-rw-r--r--src/runtime/NEON/functions/NESobel7x7.cpp13
16 files changed, 214 insertions, 53 deletions
diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp
index 318cea2342..9be1df6ea4 100644
--- a/src/runtime/NEON/functions/NECannyEdge.cpp
+++ b/src/runtime/NEON/functions/NECannyEdge.cpp
@@ -41,8 +41,9 @@
using namespace arm_compute;
-NECannyEdge::NECannyEdge() // NOLINT
- : _sobel(),
+NECannyEdge::NECannyEdge(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+ : _memory_group(std::move(memory_manager)),
+ _sobel(),
_gradient(),
_non_max_suppr(),
_edge_trace(),
@@ -93,6 +94,10 @@ void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr,
_phase.allocator()->init(info);
_nonmax.allocator()->init(info);
+ // Manage intermediate buffers
+ _memory_group.manage(&_gx);
+ _memory_group.manage(&_gy);
+
// Configure/Init sobelNxN
if(gradient_size == 3)
{
@@ -117,6 +122,10 @@ void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr,
ARM_COMPUTE_ERROR("Gradient size not supported\n");
}
+ // Manage intermediate buffers
+ _memory_group.manage(&_magnitude);
+ _memory_group.manage(&_phase);
+
// Configure gradient
if(use_fp16)
{
@@ -131,6 +140,13 @@ void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr,
_gradient = std::move(k);
}
+ // Allocate intermediate tensors
+ _gx.allocator()->allocate();
+ _gy.allocator()->allocate();
+
+ // Manage intermediate buffers
+ _memory_group.manage(&_nonmax);
+
// Configure non-maxima suppression
_non_max_suppr.configure(&_magnitude, &_phase, &_nonmax, upper_thr, lower_thr, border_mode == BorderMode::UNDEFINED);
@@ -138,6 +154,10 @@ void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr,
// it. If border mode is undefined filling the border is a nop.
_border_mag_gradient.configure(&_magnitude, _non_max_suppr.border_size(), border_mode, constant_border_value);
+ // Allocate intermediate tensors
+ _phase.allocator()->allocate();
+ _magnitude.allocator()->allocate();
+
// Configure edge tracing
_edge_trace.configure(&_nonmax, output);
@@ -145,10 +165,6 @@ void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr,
_border_edge_trace.configure(&_nonmax, _edge_trace.border_size(), BorderMode::CONSTANT, 0);
// Allocate intermediate tensors
- _gx.allocator()->allocate();
- _gy.allocator()->allocate();
- _phase.allocator()->allocate();
- _magnitude.allocator()->allocate();
_nonmax.allocator()->allocate();
}
@@ -157,6 +173,8 @@ void NECannyEdge::run()
ARM_COMPUTE_ERROR_ON_MSG(_sobel == nullptr, "Unconfigured function");
ARM_COMPUTE_ERROR_ON(_output == nullptr);
+ _memory_group.acquire();
+
// Run sobelNxN
_sobel->run();
@@ -177,4 +195,6 @@ void NECannyEdge::run()
// Run edge tracing
NEScheduler::get().schedule(&_edge_trace, Window::DimY);
+
+ _memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NEConvolution.cpp b/src/runtime/NEON/functions/NEConvolution.cpp
index 249274ba32..f10ffa6d14 100644
--- a/src/runtime/NEON/functions/NEConvolution.cpp
+++ b/src/runtime/NEON/functions/NEConvolution.cpp
@@ -48,8 +48,8 @@ void NEConvolution3x3::configure(ITensor *input, ITensor *output, const int16_t
}
template <unsigned int matrix_size>
-NEConvolutionSquare<matrix_size>::NEConvolutionSquare()
- : _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
+NEConvolutionSquare<matrix_size>::NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
{
}
@@ -72,6 +72,10 @@ void NEConvolutionSquare<matrix_size>::configure(ITensor *input, ITensor *output
_tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, intermediate_type));
+ // Manage intermediate buffers
+ _memory_group.manage(&_tmp);
+
+ // Calculate scale
if(scale == 0)
{
scale = calculate_matrix_scale(conv, matrix_size);
@@ -98,8 +102,12 @@ void NEConvolutionSquare<matrix_size>::run()
if(_is_separable)
{
+ _memory_group.acquire();
+
NEScheduler::get().schedule(&_kernel_hor, Window::DimY);
NEScheduler::get().schedule(&_kernel_vert, Window::DimY);
+
+ _memory_group.release();
}
else
{
diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
index 810efe539f..a56a73c44a 100644
--- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
@@ -33,8 +33,8 @@
using namespace arm_compute;
-NEDirectConvolutionLayer::NEDirectConvolutionLayer()
- : _accumulate_bias_kernel(), _conv_kernel(), _input_border_handler(), _accumulator()
+NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _accumulate_bias_kernel(), _conv_kernel(), _input_border_handler(), _accumulator()
{
}
@@ -46,6 +46,9 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights,
_accumulator.allocator()->free();
}
+ // Manage intermediate buffers
+ _memory_group.manage(&_accumulator);
+
// Allocate the intermediate accumulator tensor in case of fixed point input
switch(output->info()->data_type())
{
@@ -87,6 +90,10 @@ void NEDirectConvolutionLayer::run()
{
NEScheduler::get().schedule(&_input_border_handler, Window::DimZ);
+ _memory_group.acquire();
+
NEScheduler::get().schedule(&_conv_kernel, Window::DimZ);
NEScheduler::get().schedule(&_accumulate_bias_kernel, Window::DimY);
+
+ _memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index dfcb3954ea..85b283cd41 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -36,8 +36,8 @@
using namespace arm_compute;
-NEGEMM::NEGEMM()
- : _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _run_vector_matrix_multiplication(false), _run_addition(false)
+NEGEMM::NEGEMM(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _run_vector_matrix_multiplication(false), _run_addition(false)
{
}
@@ -85,6 +85,10 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
_tmp_a.allocator()->init(info_a);
_tmp_b.allocator()->init(info_b);
+ // Manage intermediate buffers
+ _memory_group.manage(&_tmp_a);
+ _memory_group.manage(&_tmp_b);
+
// Configure interleave kernel
_interleave_kernel.configure(a, &_tmp_a);
@@ -109,6 +113,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
void NEGEMM::run()
{
+ _memory_group.acquire();
+
if(!_run_vector_matrix_multiplication)
{
// Run interleave kernel
@@ -121,6 +127,8 @@ void NEGEMM::run()
// Run matrix multiply kernel
NEScheduler::get().schedule(&_mm_kernel, _run_vector_matrix_multiplication ? Window::DimX : Window::DimY);
+ _memory_group.release();
+
// Run matrix addition kernel
if(_run_addition)
{
diff --git a/src/runtime/NEON/functions/NEGEMMLowp.cpp b/src/runtime/NEON/functions/NEGEMMLowp.cpp
index b64f769459..7413b28d03 100644
--- a/src/runtime/NEON/functions/NEGEMMLowp.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowp.cpp
@@ -34,8 +34,8 @@
using namespace arm_compute;
-NEGEMMLowp::NEGEMMLowp()
- : _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _tmp_a(), _tmp_b()
+NEGEMMLowp::NEGEMMLowp(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _tmp_a(), _tmp_b()
{
}
@@ -63,6 +63,10 @@ void NEGEMMLowp::configure(const ITensor *a, const ITensor *b, ITensor *output,
_tmp_a.allocator()->init(info_a);
_tmp_b.allocator()->init(info_b);
+ // Manage intermediate buffers
+ _memory_group.manage(&_tmp_a);
+ _memory_group.manage(&_tmp_b);
+
_interleave_kernel.configure(a, &_tmp_a);
_transpose_kernel.configure(b, &_tmp_b);
_mm_kernel.configure(&_tmp_a, &_tmp_b, output, a_offset, b_offset, output_offset, output_mult_int, shift);
@@ -73,6 +77,8 @@ void NEGEMMLowp::configure(const ITensor *a, const ITensor *b, ITensor *output,
void NEGEMMLowp::run()
{
+ _memory_group.acquire();
+
/* Run interleave kernel */
NEScheduler::get().schedule(&_interleave_kernel, Window::DimY);
@@ -81,4 +87,6 @@ void NEGEMMLowp::run()
/* Run matrix multiply kernel */
NEScheduler::get().schedule(&_mm_kernel, Window::DimY);
+
+ _memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NEGaussian5x5.cpp b/src/runtime/NEON/functions/NEGaussian5x5.cpp
index a1ce985633..f085975b1e 100644
--- a/src/runtime/NEON/functions/NEGaussian5x5.cpp
+++ b/src/runtime/NEON/functions/NEGaussian5x5.cpp
@@ -32,8 +32,8 @@
using namespace arm_compute;
-NEGaussian5x5::NEGaussian5x5()
- : _kernel_hor(), _kernel_vert(), _tmp(), _border_handler()
+NEGaussian5x5::NEGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _kernel_hor(), _kernel_vert(), _tmp(), _border_handler()
{
}
@@ -43,6 +43,9 @@ void NEGaussian5x5::configure(ITensor *input, ITensor *output, BorderMode border
TensorInfo tensor_info(input->info()->tensor_shape(), 1, DataType::S16);
_tmp.allocator()->init(tensor_info);
+ // Manage intermediate buffers
+ _memory_group.manage(&_tmp);
+
// Create and configure kernels for the two passes
_kernel_hor.configure(input, &_tmp, border_mode == BorderMode::UNDEFINED);
_kernel_vert.configure(&_tmp, output, border_mode == BorderMode::UNDEFINED);
@@ -54,7 +57,11 @@ void NEGaussian5x5::configure(ITensor *input, ITensor *output, BorderMode border
void NEGaussian5x5::run()
{
+ _memory_group.acquire();
+
NEScheduler::get().schedule(&_border_handler, Window::DimZ);
NEScheduler::get().schedule(&_kernel_hor, Window::DimY);
NEScheduler::get().schedule(&_kernel_vert, Window::DimY);
+
+ _memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NEHOGDescriptor.cpp b/src/runtime/NEON/functions/NEHOGDescriptor.cpp
index a592f53d44..5e98269f47 100644
--- a/src/runtime/NEON/functions/NEHOGDescriptor.cpp
+++ b/src/runtime/NEON/functions/NEHOGDescriptor.cpp
@@ -31,8 +31,8 @@
using namespace arm_compute;
-NEHOGDescriptor::NEHOGDescriptor()
- : _gradient(), _orient_bin(), _block_norm(), _mag(), _phase(), _hog_space()
+NEHOGDescriptor::NEHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _gradient(), _orient_bin(), _block_norm(), _mag(), _phase(), _hog_space()
{
}
@@ -71,9 +71,16 @@ void NEHOGDescriptor::configure(ITensor *input, ITensor *output, const IHOG *hog
TensorInfo info_space(shape_hog_space, num_bins, DataType::F32);
_hog_space.allocator()->init(info_space);
+ // Manage intermediate buffers
+ _memory_group.manage(&_mag);
+ _memory_group.manage(&_phase);
+
// Initialise gradient kernel
_gradient.configure(input, &_mag, &_phase, hog_info->phase_type(), border_mode, constant_border_value);
+ // Manage intermediate buffers
+ _memory_group.manage(&_hog_space);
+
// Initialise orientation binning kernel
_orient_bin.configure(&_mag, &_phase, &_hog_space, hog->info());
@@ -88,6 +95,8 @@ void NEHOGDescriptor::configure(ITensor *input, ITensor *output, const IHOG *hog
void NEHOGDescriptor::run()
{
+ _memory_group.acquire();
+
// Run gradient
_gradient.run();
@@ -96,4 +105,6 @@ void NEHOGDescriptor::run()
// Run block normalization kernel
NEScheduler::get().schedule(&_block_norm, Window::DimY);
+
+ _memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NEHOGGradient.cpp b/src/runtime/NEON/functions/NEHOGGradient.cpp
index 3e2640d631..efc8690ede 100644
--- a/src/runtime/NEON/functions/NEHOGGradient.cpp
+++ b/src/runtime/NEON/functions/NEHOGGradient.cpp
@@ -30,8 +30,9 @@
using namespace arm_compute;
-NEHOGGradient::NEHOGGradient() // NOLINT
- : _derivative(),
+NEHOGGradient::NEHOGGradient(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+ : _memory_group(std::move(memory_manager)),
+ _derivative(),
_mag_phase(nullptr),
_gx(),
_gy()
@@ -51,6 +52,10 @@ void NEHOGGradient::configure(ITensor *input, ITensor *output_magnitude, ITensor
_gx.allocator()->init(info);
_gy.allocator()->init(info);
+ // Manage intermediate buffers
+ _memory_group.manage(&_gx);
+ _memory_group.manage(&_gy);
+
// Initialise derivate kernel
_derivative.configure(input, &_gx, &_gy, border_mode, constant_border_value);
@@ -75,9 +80,13 @@ void NEHOGGradient::configure(ITensor *input, ITensor *output_magnitude, ITensor
void NEHOGGradient::run()
{
+ _memory_group.acquire();
+
// Run derivative
_derivative.run();
// Run magnitude/phase kernel
NEScheduler::get().schedule(_mag_phase.get(), Window::DimY);
+
+ _memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp
index 1a038a2f62..8c834e2a93 100644
--- a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp
+++ b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp
@@ -32,8 +32,9 @@
using namespace arm_compute;
-NEHOGMultiDetection::NEHOGMultiDetection() // NOLINT
- : _gradient_kernel(),
+NEHOGMultiDetection::NEHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+ : _memory_group(std::move(memory_manager)),
+ _gradient_kernel(),
_orient_bin_kernel(),
_block_norm_kernel(),
_hog_detect_kernel(),
@@ -139,6 +140,10 @@ void NEHOGMultiDetection::configure(ITensor *input, const IMultiHOG *multi_hog,
TensorInfo info_phase(shape_img, Format::U8);
_phase.allocator()->init(info_phase);
+ // Manage intermediate buffers
+ _memory_group.manage(&_mag);
+ _memory_group.manage(&_phase);
+
// Initialise gradient kernel
_gradient_kernel.configure(input, &_mag, &_phase, phase_type, border_mode, constant_border_value);
@@ -164,10 +169,17 @@ void NEHOGMultiDetection::configure(ITensor *input, const IMultiHOG *multi_hog,
TensorInfo info_space(shape_hog_space, num_bins, DataType::F32);
_hog_space[i].allocator()->init(info_space);
+ // Manage intermediate buffers
+ _memory_group.manage(_hog_space.get() + i);
+
// Initialise orientation binning kernel
_orient_bin_kernel[i].configure(&_mag, &_phase, _hog_space.get() + i, multi_hog->model(idx_multi_hog)->info());
}
+ // Allocate intermediate tensors
+ _mag.allocator()->allocate();
+ _phase.allocator()->allocate();
+
// Configure NETensor for the normalized HOG space and block normalization kernel
for(size_t i = 0; i < _num_block_norm_kernel; ++i)
{
@@ -178,10 +190,19 @@ void NEHOGMultiDetection::configure(ITensor *input, const IMultiHOG *multi_hog,
TensorInfo tensor_info(*(multi_hog->model(idx_multi_hog)->info()), width, height);
_hog_norm_space[i].allocator()->init(tensor_info);
+ // Manage intermediate buffers
+ _memory_group.manage(_hog_norm_space.get() + i);
+
// Initialize block normalization kernel
_block_norm_kernel[i].configure(_hog_space.get() + idx_orient_bin, _hog_norm_space.get() + i, multi_hog->model(idx_multi_hog)->info());
}
+ // Allocate intermediate tensors
+ for(size_t i = 0; i < _num_orient_bin_kernel; ++i)
+ {
+ _hog_space[i].allocator()->allocate();
+ }
+
// Configure HOG detector kernel
for(size_t i = 0; i < _num_hog_detect_kernel; ++i)
{
@@ -194,14 +215,6 @@ void NEHOGMultiDetection::configure(ITensor *input, const IMultiHOG *multi_hog,
_non_maxima_kernel->configure(_detection_windows, min_distance);
// Allocate intermediate tensors
- _mag.allocator()->allocate();
- _phase.allocator()->allocate();
-
- for(size_t i = 0; i < _num_orient_bin_kernel; ++i)
- {
- _hog_space[i].allocator()->allocate();
- }
-
for(size_t i = 0; i < _num_block_norm_kernel; ++i)
{
_hog_norm_space[i].allocator()->allocate();
@@ -212,6 +225,8 @@ void NEHOGMultiDetection::run()
{
ARM_COMPUTE_ERROR_ON_MSG(_detection_windows == nullptr, "Unconfigured function");
+ _memory_group.acquire();
+
// Reset detection window
_detection_windows->clear();
@@ -241,4 +256,6 @@ void NEHOGMultiDetection::run()
{
NEScheduler::get().schedule(_non_maxima_kernel.get(), Window::DimY);
}
+
+ _memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NEL2Normalize.cpp b/src/runtime/NEON/functions/NEL2Normalize.cpp
index 378d78e3f3..349a781b0b 100644
--- a/src/runtime/NEON/functions/NEL2Normalize.cpp
+++ b/src/runtime/NEON/functions/NEL2Normalize.cpp
@@ -28,13 +28,16 @@
using namespace arm_compute;
-NEL2Normalize::NEL2Normalize()
- : _reduce_func(), _normalize_kernel(), _sumsq()
+NEL2Normalize::NEL2Normalize(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq()
{
}
void NEL2Normalize::configure(ITensor *input, ITensor *output, unsigned int axis, float epsilon)
{
+ // Manage intermediate buffers
+ _memory_group.manage(&_sumsq);
+
// Configure Kernels
_reduce_func.configure(input, &_sumsq, axis, ReductionOperation::SUM_SQUARE);
_normalize_kernel.configure(input, &_sumsq, output, axis, epsilon);
@@ -45,6 +48,10 @@ void NEL2Normalize::configure(ITensor *input, ITensor *output, unsigned int axis
void NEL2Normalize::run()
{
+ _memory_group.acquire();
+
_reduce_func.run();
NEScheduler::get().schedule(&_normalize_kernel, Window::DimY);
+
+ _memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
index e7c71e04d1..cb48598921 100644
--- a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
@@ -33,8 +33,9 @@
using namespace arm_compute;
-NELocallyConnectedLayer::NELocallyConnectedLayer()
- : _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(), _is_first_run(false)
+NELocallyConnectedLayer::NELocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(),
+ _is_first_run(false)
{
}
@@ -102,6 +103,10 @@ void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *wei
shape_gemm.set(1, mat_input_rows);
_gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type()));
+ // Manage intermediate buffers
+ _memory_group.manage(&_input_im2col_reshaped);
+ _memory_group.manage(&_gemm_output);
+
// Configure kernels
_input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
_weights_reshape_kernel.configure(weights, biases, &_weights_reshaped);
@@ -123,6 +128,8 @@ void NELocallyConnectedLayer::run()
NEScheduler::get().schedule(&_weights_reshape_kernel, 3);
}
+ _memory_group.acquire();
+
// Run input reshaping
NEScheduler::get().schedule(&_input_im2col_kernel, Window::DimY);
@@ -131,4 +138,6 @@ void NELocallyConnectedLayer::run()
// Reshape output matrix
NEScheduler::get().schedule(&_output_col2im_kernel, Window::DimY);
+
+ _memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NENormalizationLayer.cpp b/src/runtime/NEON/functions/NENormalizationLayer.cpp
index 69ff32591f..e01ef6660d 100644
--- a/src/runtime/NEON/functions/NENormalizationLayer.cpp
+++ b/src/runtime/NEON/functions/NENormalizationLayer.cpp
@@ -32,8 +32,8 @@
using namespace arm_compute;
-NENormalizationLayer::NENormalizationLayer()
- : _norm_kernel(), _multiply_kernel(), _border_handler(), _input_squared()
+NENormalizationLayer::NENormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _norm_kernel(), _multiply_kernel(), _border_handler(), _input_squared()
{
}
@@ -44,6 +44,9 @@ void NENormalizationLayer::configure(const ITensor *input, ITensor *output, Norm
TensorInfo tensor_info(input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
_input_squared.allocator()->init(tensor_info);
+ // Manage intermediate buffers
+ _memory_group.manage(&_input_squared);
+
// Configure kernels
_norm_kernel.configure(input, &_input_squared, output, norm_info);
_multiply_kernel.configure(input, input, &_input_squared, 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
@@ -55,7 +58,11 @@ void NENormalizationLayer::configure(const ITensor *input, ITensor *output, Norm
void NENormalizationLayer::run()
{
+ _memory_group.acquire();
+
NEScheduler::get().schedule(&_multiply_kernel, Window::DimY);
NEScheduler::get().schedule(&_border_handler, Window::DimY);
NEScheduler::get().schedule(&_norm_kernel, Window::DimY);
+
+ _memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NEOpticalFlow.cpp b/src/runtime/NEON/functions/NEOpticalFlow.cpp
index 3e69a33897..e90d8f6270 100644
--- a/src/runtime/NEON/functions/NEOpticalFlow.cpp
+++ b/src/runtime/NEON/functions/NEOpticalFlow.cpp
@@ -37,8 +37,9 @@
using namespace arm_compute;
-NEOpticalFlow::NEOpticalFlow() // NOLINT
- : _func_scharr(),
+NEOpticalFlow::NEOpticalFlow(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+ : _memory_group(std::move(memory_manager)),
+ _func_scharr(),
_kernel_tracker(),
_scharr_gx(),
_scharr_gy(),
@@ -97,6 +98,10 @@ void NEOpticalFlow::configure(const Pyramid *old_pyramid, const Pyramid *new_pyr
_scharr_gx[i].allocator()->init(tensor_info);
_scharr_gy[i].allocator()->init(tensor_info);
+ // Manage intermediate buffers
+ _memory_group.manage(_scharr_gx.get() + i);
+ _memory_group.manage(_scharr_gy.get() + i);
+
// Init Scharr kernel
_func_scharr[i].configure(old_ith_input, _scharr_gx.get() + i, _scharr_gy.get() + i, border_mode, constant_border_value);
@@ -116,6 +121,8 @@ void NEOpticalFlow::run()
{
ARM_COMPUTE_ERROR_ON_MSG(_num_levels == 0, "Unconfigured function");
+ _memory_group.acquire();
+
for(unsigned int level = _num_levels; level > 0; --level)
{
// Run Scharr kernel
@@ -124,4 +131,6 @@ void NEOpticalFlow::run()
// Run Lucas-Kanade kernel
NEScheduler::get().schedule(_kernel_tracker.get() + level - 1, Window::DimX);
}
+
+ _memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp
index 7fc352ab1f..6c5ac3c45b 100644
--- a/src/runtime/NEON/functions/NEScale.cpp
+++ b/src/runtime/NEON/functions/NEScale.cpp
@@ -27,10 +27,10 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Window.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
#include "support/ToolchainSupport.h"
@@ -86,10 +86,13 @@ void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float
}
} // namespace
-NEScale::NEScale() // NOLINT
- : _offsets(),
+NEScale::NEScale(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+ : _memory_group(std::move(memory_manager)),
+ _offsets(),
_dx(),
- _dy()
+ _dy(),
+ _scale_kernel(),
+ _border_handler()
{
}
@@ -119,8 +122,6 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol
policy = InterpolationPolicy::NEAREST_NEIGHBOR;
}
- auto k = arm_compute::support::cpp14::make_unique<NEScaleKernel>();
-
// Check if the border mode is UNDEFINED
const bool border_undefined = border_mode == BorderMode::UNDEFINED;
@@ -130,8 +131,9 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol
{
TensorInfo tensor_info_offsets(shape, Format::S32);
_offsets.allocator()->init(tensor_info_offsets);
+ _memory_group.manage(&_offsets);
- k->configure(input, nullptr, nullptr, &_offsets, output, policy, border_undefined);
+ _scale_kernel.configure(input, nullptr, nullptr, &_offsets, output, policy, border_undefined);
// Allocate once the configure methods have been called
_offsets.allocator()->allocate();
@@ -149,7 +151,12 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol
_dx.allocator()->init(tensor_info_dxdy);
_dy.allocator()->init(tensor_info_dxdy);
- k->configure(input, &_dx, &_dy, &_offsets, output, policy, border_undefined);
+ // Manage intermediate buffers
+ _memory_group.manage(&_offsets);
+ _memory_group.manage(&_dx);
+ _memory_group.manage(&_dy);
+
+ _scale_kernel.configure(input, &_dx, &_dy, &_offsets, output, policy, border_undefined);
// Allocate once the configure methods have been called
_offsets.allocator()->allocate();
@@ -162,13 +169,22 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol
}
case InterpolationPolicy::AREA:
{
- k->configure(input, nullptr, nullptr, nullptr, output, policy, border_undefined);
+ _scale_kernel.configure(input, nullptr, nullptr, nullptr, output, policy, border_undefined);
break;
}
default:
ARM_COMPUTE_ERROR("Unsupported interpolation mode");
}
- _kernel = std::move(k);
- _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler.configure(input, _scale_kernel.border_size(), border_mode, PixelValue(constant_border_value));
+}
+
+void NEScale::run()
+{
+ _memory_group.acquire();
+
+ NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ NEScheduler::get().schedule(&_scale_kernel, Window::DimY);
+
+ _memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NESobel5x5.cpp b/src/runtime/NEON/functions/NESobel5x5.cpp
index 305d21122e..d8f4eda2ff 100644
--- a/src/runtime/NEON/functions/NESobel5x5.cpp
+++ b/src/runtime/NEON/functions/NESobel5x5.cpp
@@ -32,8 +32,8 @@
using namespace arm_compute;
-NESobel5x5::NESobel5x5()
- : _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler()
+NESobel5x5::NESobel5x5(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler()
{
}
@@ -50,6 +50,8 @@ void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y,
{
_tmp_x.allocator()->init(tensor_info);
_tmp_y.allocator()->init(tensor_info);
+ _memory_group.manage(&_tmp_x);
+ _memory_group.manage(&_tmp_y);
_sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
_sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
@@ -58,6 +60,7 @@ void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y,
else if(run_sobel_x)
{
_tmp_x.allocator()->init(tensor_info);
+ _memory_group.manage(&_tmp_x);
_sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
_sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
@@ -65,6 +68,7 @@ void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y,
else if(run_sobel_y)
{
_tmp_y.allocator()->init(tensor_info);
+ _memory_group.manage(&_tmp_y);
_sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
_sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_y.allocator()->allocate();
@@ -76,6 +80,11 @@ void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y,
void NESobel5x5::run()
{
NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+
+ _memory_group.acquire();
+
NEScheduler::get().schedule(&_sobel_hor, Window::DimY);
NEScheduler::get().schedule(&_sobel_vert, Window::DimY);
+
+ _memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NESobel7x7.cpp b/src/runtime/NEON/functions/NESobel7x7.cpp
index 57fe028567..5b6f60b338 100644
--- a/src/runtime/NEON/functions/NESobel7x7.cpp
+++ b/src/runtime/NEON/functions/NESobel7x7.cpp
@@ -32,8 +32,8 @@
using namespace arm_compute;
-NESobel7x7::NESobel7x7()
- : _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler()
+NESobel7x7::NESobel7x7(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler()
{
}
@@ -50,6 +50,8 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y,
{
_tmp_x.allocator()->init(tensor_info);
_tmp_y.allocator()->init(tensor_info);
+ _memory_group.manage(&_tmp_x);
+ _memory_group.manage(&_tmp_y);
_sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
_sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
@@ -58,6 +60,7 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y,
else if(run_sobel_x)
{
_tmp_x.allocator()->init(tensor_info);
+ _memory_group.manage(&_tmp_x);
_sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
_sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
@@ -65,6 +68,7 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y,
else if(run_sobel_y)
{
_tmp_y.allocator()->init(tensor_info);
+ _memory_group.manage(&_tmp_y);
_sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
_sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_y.allocator()->allocate();
@@ -76,6 +80,11 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y,
void NESobel7x7::run()
{
NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+
+ _memory_group.acquire();
+
NEScheduler::get().schedule(&_sobel_hor, Window::DimY);
NEScheduler::get().schedule(&_sobel_vert, Window::DimY);
+
+ _memory_group.release();
}