From 5701e2a41ddf0a12042ac648993fc39701961f66 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 18 Sep 2017 17:43:33 +0100 Subject: COMPMID-534: Port MemoryManager to CL functions (Images) Adds support for: -CLCannyEdge -CLFastCorners -CLGaussian5x5 -CLHarrisCorners -CLSobel3x3 -CLSobel5x5 Change-Id: I712a76d4ceda915b5cf85a4d12c1b7a059d4d909 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/88118 Tested-by: Kaizen Reviewed-by: Moritz Pflanzer Reviewed-by: Pablo Tello --- arm_compute/runtime/CL/functions/CLCannyEdge.h | 5 ++- arm_compute/runtime/CL/functions/CLFastCorners.h | 6 +++- arm_compute/runtime/CL/functions/CLGaussian5x5.h | 6 +++- arm_compute/runtime/CL/functions/CLHarrisCorners.h | 6 ++-- arm_compute/runtime/CL/functions/CLSobel5x5.h | 6 +++- arm_compute/runtime/CL/functions/CLSobel7x7.h | 6 +++- src/runtime/CL/functions/CLCannyEdge.cpp | 41 ++++++++++++++++++---- src/runtime/CL/functions/CLFastCorners.cpp | 11 ++++-- src/runtime/CL/functions/CLGaussian5x5.cpp | 13 +++++-- src/runtime/CL/functions/CLHarrisCorners.cpp | 35 +++++++++++++----- src/runtime/CL/functions/CLSobel5x5.cpp | 13 +++++-- src/runtime/CL/functions/CLSobel7x7.cpp | 13 +++++-- src/runtime/NEON/functions/NEGaussian5x5.cpp | 3 +- 13 files changed, 134 insertions(+), 30 deletions(-) diff --git a/arm_compute/runtime/CL/functions/CLCannyEdge.h b/arm_compute/runtime/CL/functions/CLCannyEdge.h index e5a82b2263..1d5a5aaeaa 100644 --- a/arm_compute/runtime/CL/functions/CLCannyEdge.h +++ b/arm_compute/runtime/CL/functions/CLCannyEdge.h @@ -28,7 +28,9 @@ #include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h" #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IMemoryManager.h" #include @@ -49,7 +51,7 @@ class CLCannyEdge : public IFunction { public: /** Constructor */ - CLCannyEdge(); + CLCannyEdge(std::shared_ptr memory_manager = nullptr); /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode. * * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED) @@ -68,6 +70,7 @@ public: virtual void run() override; private: + CLMemoryGroup _memory_group; /**< Function's memory group */ std::unique_ptr _sobel; /**< Pointer to Sobel kernel. */ CLGradientKernel _gradient; /**< Gradient kernel. */ CLFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */ diff --git a/arm_compute/runtime/CL/functions/CLFastCorners.h b/arm_compute/runtime/CL/functions/CLFastCorners.h index 79d82af462..9afec71bc3 100644 --- a/arm_compute/runtime/CL/functions/CLFastCorners.h +++ b/arm_compute/runtime/CL/functions/CLFastCorners.h @@ -29,11 +29,14 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Window.h" #include "arm_compute/runtime/CL/CLArray.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" #include +#include namespace arm_compute { @@ -51,7 +54,7 @@ class CLFastCorners : public IFunction { public: /** Constructor */ - CLFastCorners(); + CLFastCorners(std::shared_ptr memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ CLFastCorners(const CLFastCorners &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -72,6 +75,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; CLFastCornersKernel _fast_corners_kernel; CLNonMaximaSuppression3x3 _suppr_func; CLCopyToArrayKernel _copy_array_kernel; diff --git a/arm_compute/runtime/CL/functions/CLGaussian5x5.h b/arm_compute/runtime/CL/functions/CLGaussian5x5.h index 148b9a9924..3c60cc66a3 100644 --- a/arm_compute/runtime/CL/functions/CLGaussian5x5.h +++ b/arm_compute/runtime/CL/functions/CLGaussian5x5.h @@ -27,10 +27,13 @@ #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" #include +#include namespace arm_compute { @@ -47,7 +50,7 @@ class CLGaussian5x5 : public IFunction { public: /** Default Constructor. */ - CLGaussian5x5(); + CLGaussian5x5(std::shared_ptr memory_manager = nullptr); /** Initialise the function's source, destinations and border mode. * * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) @@ -61,6 +64,7 @@ public: void run() override; protected: + CLMemoryGroup _memory_group; /**< Function's memory group */ CLGaussian5x5HorKernel _kernel_hor; /**< Horizontal pass kernel */ CLGaussian5x5VertKernel _kernel_vert; /**< Vertical pass kernel */ CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h index f9a1275f68..e09e67060f 100644 --- a/arm_compute/runtime/CL/functions/CLHarrisCorners.h +++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h @@ -31,11 +31,12 @@ #include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" #include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h" +#include "arm_compute/runtime/IMemoryManager.h" #include - #include namespace arm_compute @@ -60,7 +61,7 @@ class CLHarrisCorners : public IFunction { public: /** Constructor */ - CLHarrisCorners(); + CLHarrisCorners(std::shared_ptr memory_manager = nullptr); /** Prevent instances of this class from being copied (As this class contains pointers) */ CLHarrisCorners(const CLHarrisCorners &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -85,6 +86,7 @@ public: void run() override; private: + CLMemoryGroup _memory_group; /**< Function's memory group */ std::unique_ptr _sobel; /**< Sobel function */ CLHarrisScoreKernel _harris_score; /**< Harris score kernel */ CLNonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */ diff --git a/arm_compute/runtime/CL/functions/CLSobel5x5.h b/arm_compute/runtime/CL/functions/CLSobel5x5.h index ad1f72faf8..3e603f8311 100644 --- a/arm_compute/runtime/CL/functions/CLSobel5x5.h +++ b/arm_compute/runtime/CL/functions/CLSobel5x5.h @@ -27,10 +27,13 @@ #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" #include +#include namespace arm_compute { @@ -47,7 +50,7 @@ class CLSobel5x5 : public IFunction { public: /** Default Constructor. */ - CLSobel5x5(); + CLSobel5x5(std::shared_ptr memory_manager = nullptr); /** Initialise the function's source, destinations and border mode. * * @note At least one of output_x or output_y must be not NULL. @@ -64,6 +67,7 @@ public: void run() override; protected: + CLMemoryGroup _memory_group; /**< Function's memory group */ CLSobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */ CLSobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */ CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ diff --git a/arm_compute/runtime/CL/functions/CLSobel7x7.h b/arm_compute/runtime/CL/functions/CLSobel7x7.h index 1a3fe1a50a..0dc0a1c5e9 100644 --- a/arm_compute/runtime/CL/functions/CLSobel7x7.h +++ b/arm_compute/runtime/CL/functions/CLSobel7x7.h @@ -27,10 +27,13 @@ #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" #include +#include namespace arm_compute { @@ -47,7 +50,7 @@ class CLSobel7x7 : public IFunction { public: /** Default Constructor. */ - CLSobel7x7(); + CLSobel7x7(std::shared_ptr memory_manager = nullptr); /** Initialise the function's source, destinations and border mode. * * @note At least one of output_x or output_y must be not NULL. @@ -64,6 +67,7 @@ public: void run() override; protected: + CLMemoryGroup _memory_group; /**< Function's memory group */ CLSobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */ CLSobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */ CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ diff --git a/src/runtime/CL/functions/CLCannyEdge.cpp b/src/runtime/CL/functions/CLCannyEdge.cpp index 448ca9289d..5acb8e7ddb 100644 --- a/src/runtime/CL/functions/CLCannyEdge.cpp +++ b/src/runtime/CL/functions/CLCannyEdge.cpp @@ -35,8 +35,9 @@ using namespace arm_compute; -CLCannyEdge::CLCannyEdge() // NOLINT - : _sobel(), +CLCannyEdge::CLCannyEdge(std::shared_ptr memory_manager) // NOLINT + : _memory_group(std::move(memory_manager)), + _sobel(), _gradient(), _border_mag_gradient(), _non_max_suppr(), @@ -96,6 +97,10 @@ void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_t TensorInfo info_s32(shape_l1_stack, 1, arm_compute::DataType::S32); _l1_stack.allocator()->init(info_s32); + // Manage intermediate buffers + _memory_group.manage(&_gx); + _memory_group.manage(&_gy); + // Configure/Init sobelNxN if(gradient_size == 3) { @@ -120,23 +125,43 @@ void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_t ARM_COMPUTE_ERROR("Gradient %d size not supported", gradient_size); } + // Manage intermediate buffers + _memory_group.manage(&_mag); + _memory_group.manage(&_phase); + // Configure gradient _gradient.configure(&_gx, &_gy, &_mag, &_phase, norm_type); + // Allocate intermediate buffers + _gx.allocator()->allocate(); + _gy.allocator()->allocate(); + + // Manage intermediate buffers + _memory_group.manage(&_nonmax); + // Configure non-maxima suppression _non_max_suppr.configure(&_mag, &_phase, &_nonmax, lower_thr, border_mode == BorderMode::UNDEFINED); + // Allocate intermediate buffers + _phase.allocator()->allocate(); + // Fill border around magnitude image as non-maxima suppression will access // it. If border mode is undefined filling the border is a nop. _border_mag_gradient.configure(&_mag, _non_max_suppr.border_size(), border_mode, constant_border_value); + // Allocate intermediate buffers + _mag.allocator()->allocate(); + + // Manage intermediate buffers + _memory_group.manage(&_visited); + _memory_group.manage(&_recorded); + _memory_group.manage(&_l1_stack); + _memory_group.manage(&_l1_list_counter); + // Configure edge tracing _edge_trace.configure(&_nonmax, output, upper_thr, lower_thr, &_visited, &_recorded, &_l1_stack, &_l1_list_counter); - _gx.allocator()->allocate(); - _gy.allocator()->allocate(); - _phase.allocator()->allocate(); - _mag.allocator()->allocate(); + // Allocate intermediate buffers _visited.allocator()->allocate(); _recorded.allocator()->allocate(); _l1_stack.allocator()->allocate(); @@ -146,6 +171,8 @@ void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_t void CLCannyEdge::run() { + _memory_group.acquire(); + // Run sobel _sobel->run(); @@ -165,4 +192,6 @@ void CLCannyEdge::run() _l1_list_counter.clear(CLScheduler::get().queue()); _l1_stack.clear(CLScheduler::get().queue()); CLScheduler::get().enqueue(_edge_trace, true); + + _memory_group.release(); } diff --git a/src/runtime/CL/functions/CLFastCorners.cpp b/src/runtime/CL/functions/CLFastCorners.cpp index d2903fb849..7a0dd09fbe 100644 --- a/src/runtime/CL/functions/CLFastCorners.cpp +++ b/src/runtime/CL/functions/CLFastCorners.cpp @@ -36,8 +36,9 @@ using namespace arm_compute; -CLFastCorners::CLFastCorners() - : _fast_corners_kernel(), +CLFastCorners::CLFastCorners(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), + _fast_corners_kernel(), _suppr_func(), _copy_array_kernel(), _output(), @@ -70,6 +71,7 @@ void CLFastCorners::configure(const ICLImage *input, float threshold, bool nonma const bool update_number = (nullptr != _num_corners); + _memory_group.manage(&_output); _fast_corners_kernel.configure(input, &_output, threshold, nonmax_suppression, border_mode); if(!_non_max) @@ -79,6 +81,7 @@ void CLFastCorners::configure(const ICLImage *input, float threshold, bool nonma else { _suppr.allocator()->init(tensor_info); + _memory_group.manage(&_suppr); _suppr_func.configure(&_output, &_suppr, border_mode); _copy_array_kernel.configure(&_suppr, update_number, corners, &_num_buffer); @@ -94,6 +97,8 @@ void CLFastCorners::run() { cl::CommandQueue q = CLScheduler::get().queue(); + _memory_group.acquire(); + if(_non_max) { ARM_COMPUTE_ERROR_ON_MSG(_output.cl_buffer().get() == nullptr, "Unconfigured function"); @@ -124,4 +129,6 @@ void CLFastCorners::run() } q.flush(); + + _memory_group.release(); } diff --git a/src/runtime/CL/functions/CLGaussian5x5.cpp b/src/runtime/CL/functions/CLGaussian5x5.cpp index e83a8fb857..f30eee1df7 100644 --- a/src/runtime/CL/functions/CLGaussian5x5.cpp +++ b/src/runtime/CL/functions/CLGaussian5x5.cpp @@ -35,8 +35,8 @@ using namespace arm_compute; -CLGaussian5x5::CLGaussian5x5() - : _kernel_hor(), _kernel_vert(), _border_handler(), _tmp() +CLGaussian5x5::CLGaussian5x5(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _kernel_hor(), _kernel_vert(), _border_handler(), _tmp() { } @@ -46,6 +46,10 @@ void CLGaussian5x5::configure(ICLTensor *input, ICLTensor *output, BorderMode bo _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, DataType::U16)); + // Manage intermediate buffers + _memory_group.manage(&_tmp); + + // Configure kernels _kernel_hor.configure(input, &_tmp, border_mode == BorderMode::UNDEFINED); _kernel_vert.configure(&_tmp, output, border_mode == BorderMode::UNDEFINED); _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value)); @@ -57,6 +61,11 @@ void CLGaussian5x5::configure(ICLTensor *input, ICLTensor *output, BorderMode bo void CLGaussian5x5::run() { CLScheduler::get().enqueue(_border_handler, false); + + _memory_group.acquire(); + CLScheduler::get().enqueue(_kernel_hor, false); CLScheduler::get().enqueue(_kernel_vert); + + _memory_group.release(); } diff --git a/src/runtime/CL/functions/CLHarrisCorners.cpp b/src/runtime/CL/functions/CLHarrisCorners.cpp index 2140240753..059528fe30 100644 --- a/src/runtime/CL/functions/CLHarrisCorners.cpp +++ b/src/runtime/CL/functions/CLHarrisCorners.cpp @@ -42,8 +42,9 @@ using namespace arm_compute; -CLHarrisCorners::CLHarrisCorners() // NOLINT - : _sobel(nullptr), +CLHarrisCorners::CLHarrisCorners(std::shared_ptr memory_manager) // NOLINT + : _memory_group(std::move(memory_manager)), + _sobel(nullptr), _harris_score(), _non_max_suppr(), _candidates(), @@ -84,6 +85,10 @@ void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist _corners_list = arm_compute::support::cpp14::make_unique(shape.x() * shape.y()); + // Manage intermediate buffers + _memory_group.manage(&_gx); + _memory_group.manage(&_gy); + /* Set/init Sobel kernel accordingly with gradient_size */ switch(gradient_size) { @@ -116,6 +121,9 @@ void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist const float norm_factor = 1.0f / (255.0f * pow(4.0f, gradient_size / 2) * block_size); const float pow4_normalization_factor = pow(norm_factor, 4); + // Manage intermediate buffers + _memory_group.manage(&_score); + // Set/init Harris Score kernel accordingly with block_size _harris_score.configure(&_gx, &_gy, &_score, block_size, pow4_normalization_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); @@ -123,26 +131,35 @@ void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist _border_gx.configure(&_gx, _harris_score.border_size(), border_mode, PixelValue(constant_border_value)); _border_gy.configure(&_gy, _harris_score.border_size(), border_mode, PixelValue(constant_border_value)); + // Allocate intermediate buffers + _gx.allocator()->allocate(); + _gy.allocator()->allocate(); + + // Manage intermediate buffers + _memory_group.manage(&_nonmax); + // Init non-maxima suppression function _non_max_suppr.configure(&_score, &_nonmax, border_mode); + // Allocate intermediate buffers + _score.allocator()->allocate(); + // Init corner candidates kernel _candidates.configure(&_nonmax, _corners_list.get(), &_num_corner_candidates); - // Init euclidean distance - _sort_euclidean.configure(_corners_list.get(), _corners, &_num_corner_candidates, min_dist); - // Allocate intermediate buffers - _gx.allocator()->allocate(); - _gy.allocator()->allocate(); - _score.allocator()->allocate(); _nonmax.allocator()->allocate(); + + // Init euclidean distance + _sort_euclidean.configure(_corners_list.get(), _corners, &_num_corner_candidates, min_dist); } void CLHarrisCorners::run() { ARM_COMPUTE_ERROR_ON_MSG(_sobel == nullptr, "Unconfigured function"); + _memory_group.acquire(); + // Init to 0 number of corner candidates _num_corner_candidates = 0; @@ -167,4 +184,6 @@ void CLHarrisCorners::run() _corners->map(CLScheduler::get().queue(), true); Scheduler::get().schedule(&_sort_euclidean, Window::DimY); _corners->unmap(CLScheduler::get().queue()); + + _memory_group.release(); } diff --git a/src/runtime/CL/functions/CLSobel5x5.cpp b/src/runtime/CL/functions/CLSobel5x5.cpp index 098b546c1a..d4bc85524e 100644 --- a/src/runtime/CL/functions/CLSobel5x5.cpp +++ b/src/runtime/CL/functions/CLSobel5x5.cpp @@ -33,8 +33,8 @@ using namespace arm_compute; -CLSobel5x5::CLSobel5x5() - : _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y() +CLSobel5x5::CLSobel5x5(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y() { } @@ -51,6 +51,8 @@ void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out { _tmp_x.allocator()->init(tensor_info); _tmp_y.allocator()->init(tensor_info); + _memory_group.manage(&_tmp_x); + _memory_group.manage(&_tmp_y); _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); @@ -59,6 +61,7 @@ void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out else if(run_sobel_x) { _tmp_x.allocator()->init(tensor_info); + _memory_group.manage(&_tmp_x); _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); @@ -66,6 +69,7 @@ void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out else if(run_sobel_y) { _tmp_y.allocator()->init(tensor_info); + _memory_group.manage(&_tmp_y); _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); _tmp_y.allocator()->allocate(); @@ -76,6 +80,11 @@ void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out void CLSobel5x5::run() { CLScheduler::get().enqueue(_border_handler, false); + + _memory_group.acquire(); + CLScheduler::get().enqueue(_sobel_hor, false); CLScheduler::get().enqueue(_sobel_vert); + + _memory_group.release(); } diff --git a/src/runtime/CL/functions/CLSobel7x7.cpp b/src/runtime/CL/functions/CLSobel7x7.cpp index db84fa99ae..60830905df 100644 --- a/src/runtime/CL/functions/CLSobel7x7.cpp +++ b/src/runtime/CL/functions/CLSobel7x7.cpp @@ -33,8 +33,8 @@ using namespace arm_compute; -CLSobel7x7::CLSobel7x7() - : _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y() +CLSobel7x7::CLSobel7x7(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y() { } @@ -51,6 +51,8 @@ void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out { _tmp_x.allocator()->init(tensor_info); _tmp_y.allocator()->init(tensor_info); + _memory_group.manage(&_tmp_x); + _memory_group.manage(&_tmp_y); _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); @@ -59,6 +61,7 @@ void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out else if(run_sobel_x) { _tmp_x.allocator()->init(tensor_info); + _memory_group.manage(&_tmp_x); _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); _tmp_x.allocator()->allocate(); @@ -66,6 +69,7 @@ void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out else if(run_sobel_y) { _tmp_y.allocator()->init(tensor_info); + _memory_group.manage(&_tmp_y); _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); _tmp_y.allocator()->allocate(); @@ -76,6 +80,11 @@ void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out void CLSobel7x7::run() { CLScheduler::get().enqueue(_border_handler, false); + + _memory_group.acquire(); + CLScheduler::get().enqueue(_sobel_hor, false); CLScheduler::get().enqueue(_sobel_vert); + + _memory_group.release(); } diff --git a/src/runtime/NEON/functions/NEGaussian5x5.cpp b/src/runtime/NEON/functions/NEGaussian5x5.cpp index f085975b1e..b010ca0104 100644 --- a/src/runtime/NEON/functions/NEGaussian5x5.cpp +++ b/src/runtime/NEON/functions/NEGaussian5x5.cpp @@ -57,9 +57,10 @@ void NEGaussian5x5::configure(ITensor *input, ITensor *output, BorderMode border void NEGaussian5x5::run() { + NEScheduler::get().schedule(&_border_handler, Window::DimZ); + _memory_group.acquire(); - NEScheduler::get().schedule(&_border_handler, Window::DimZ); NEScheduler::get().schedule(&_kernel_hor, Window::DimY); NEScheduler::get().schedule(&_kernel_vert, Window::DimY); -- cgit v1.2.1