aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-09-18 17:43:33 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit5701e2a41ddf0a12042ac648993fc39701961f66 (patch)
treedd66e82e7f7fdc30a636e748a774422bc1bec40d
parente938175997b973e1ea288f5b95cc8710e6abc7aa (diff)
downloadComputeLibrary-5701e2a41ddf0a12042ac648993fc39701961f66.tar.gz
COMPMID-534: Port MemoryManager to CL functions (Images)
Adds support for: -CLCannyEdge -CLFastCorners -CLGaussian5x5 -CLHarrisCorners -CLSobel3x3 -CLSobel5x5 Change-Id: I712a76d4ceda915b5cf85a4d12c1b7a059d4d909 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/88118 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com>
-rw-r--r--arm_compute/runtime/CL/functions/CLCannyEdge.h5
-rw-r--r--arm_compute/runtime/CL/functions/CLFastCorners.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLGaussian5x5.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLHarrisCorners.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLSobel5x5.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLSobel7x7.h6
-rw-r--r--src/runtime/CL/functions/CLCannyEdge.cpp41
-rw-r--r--src/runtime/CL/functions/CLFastCorners.cpp11
-rw-r--r--src/runtime/CL/functions/CLGaussian5x5.cpp13
-rw-r--r--src/runtime/CL/functions/CLHarrisCorners.cpp35
-rw-r--r--src/runtime/CL/functions/CLSobel5x5.cpp13
-rw-r--r--src/runtime/CL/functions/CLSobel7x7.cpp13
-rw-r--r--src/runtime/NEON/functions/NEGaussian5x5.cpp3
13 files changed, 134 insertions, 30 deletions
diff --git a/arm_compute/runtime/CL/functions/CLCannyEdge.h b/arm_compute/runtime/CL/functions/CLCannyEdge.h
index e5a82b2263..1d5a5aaeaa 100644
--- a/arm_compute/runtime/CL/functions/CLCannyEdge.h
+++ b/arm_compute/runtime/CL/functions/CLCannyEdge.h
@@ -28,7 +28,9 @@
#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h"
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IMemoryManager.h"
#include <memory>
@@ -49,7 +51,7 @@ class CLCannyEdge : public IFunction
{
public:
/** Constructor */
- CLCannyEdge();
+ CLCannyEdge(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode.
*
* @param[in,out] input Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED)
@@ -68,6 +70,7 @@ public:
virtual void run() override;
private:
+ CLMemoryGroup _memory_group; /**< Function's memory group */
std::unique_ptr<IFunction> _sobel; /**< Pointer to Sobel kernel. */
CLGradientKernel _gradient; /**< Gradient kernel. */
CLFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
diff --git a/arm_compute/runtime/CL/functions/CLFastCorners.h b/arm_compute/runtime/CL/functions/CLFastCorners.h
index 79d82af462..9afec71bc3 100644
--- a/arm_compute/runtime/CL/functions/CLFastCorners.h
+++ b/arm_compute/runtime/CL/functions/CLFastCorners.h
@@ -29,11 +29,14 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/CL/CLArray.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
#include <cstdint>
+#include <memory>
namespace arm_compute
{
@@ -51,7 +54,7 @@ class CLFastCorners : public IFunction
{
public:
/** Constructor */
- CLFastCorners();
+ CLFastCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLFastCorners(const CLFastCorners &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -72,6 +75,7 @@ public:
void run() override;
private:
+ CLMemoryGroup _memory_group;
CLFastCornersKernel _fast_corners_kernel;
CLNonMaximaSuppression3x3 _suppr_func;
CLCopyToArrayKernel _copy_array_kernel;
diff --git a/arm_compute/runtime/CL/functions/CLGaussian5x5.h b/arm_compute/runtime/CL/functions/CLGaussian5x5.h
index 148b9a9924..3c60cc66a3 100644
--- a/arm_compute/runtime/CL/functions/CLGaussian5x5.h
+++ b/arm_compute/runtime/CL/functions/CLGaussian5x5.h
@@ -27,10 +27,13 @@
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
#include <cstdint>
+#include <memory>
namespace arm_compute
{
@@ -47,7 +50,7 @@ class CLGaussian5x5 : public IFunction
{
public:
/** Default Constructor. */
- CLGaussian5x5();
+ CLGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Initialise the function's source, destinations and border mode.
*
* @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -61,6 +64,7 @@ public:
void run() override;
protected:
+ CLMemoryGroup _memory_group; /**< Function's memory group */
CLGaussian5x5HorKernel _kernel_hor; /**< Horizontal pass kernel */
CLGaussian5x5VertKernel _kernel_vert; /**< Vertical pass kernel */
CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */
diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
index f9a1275f68..e09e67060f 100644
--- a/arm_compute/runtime/CL/functions/CLHarrisCorners.h
+++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
@@ -31,11 +31,12 @@
#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
+#include "arm_compute/runtime/IMemoryManager.h"
#include <cstdint>
-
#include <memory>
namespace arm_compute
@@ -60,7 +61,7 @@ class CLHarrisCorners : public IFunction
{
public:
/** Constructor */
- CLHarrisCorners();
+ CLHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLHarrisCorners(const CLHarrisCorners &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -85,6 +86,7 @@ public:
void run() override;
private:
+ CLMemoryGroup _memory_group; /**< Function's memory group */
std::unique_ptr<IFunction> _sobel; /**< Sobel function */
CLHarrisScoreKernel _harris_score; /**< Harris score kernel */
CLNonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */
diff --git a/arm_compute/runtime/CL/functions/CLSobel5x5.h b/arm_compute/runtime/CL/functions/CLSobel5x5.h
index ad1f72faf8..3e603f8311 100644
--- a/arm_compute/runtime/CL/functions/CLSobel5x5.h
+++ b/arm_compute/runtime/CL/functions/CLSobel5x5.h
@@ -27,10 +27,13 @@
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
#include <cstdint>
+#include <memory>
namespace arm_compute
{
@@ -47,7 +50,7 @@ class CLSobel5x5 : public IFunction
{
public:
/** Default Constructor. */
- CLSobel5x5();
+ CLSobel5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Initialise the function's source, destinations and border mode.
*
* @note At least one of output_x or output_y must be not NULL.
@@ -64,6 +67,7 @@ public:
void run() override;
protected:
+ CLMemoryGroup _memory_group; /**< Function's memory group */
CLSobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */
CLSobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */
CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */
diff --git a/arm_compute/runtime/CL/functions/CLSobel7x7.h b/arm_compute/runtime/CL/functions/CLSobel7x7.h
index 1a3fe1a50a..0dc0a1c5e9 100644
--- a/arm_compute/runtime/CL/functions/CLSobel7x7.h
+++ b/arm_compute/runtime/CL/functions/CLSobel7x7.h
@@ -27,10 +27,13 @@
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
#include <cstdint>
+#include <memory>
namespace arm_compute
{
@@ -47,7 +50,7 @@ class CLSobel7x7 : public IFunction
{
public:
/** Default Constructor. */
- CLSobel7x7();
+ CLSobel7x7(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Initialise the function's source, destinations and border mode.
*
* @note At least one of output_x or output_y must be not NULL.
@@ -64,6 +67,7 @@ public:
void run() override;
protected:
+ CLMemoryGroup _memory_group; /**< Function's memory group */
CLSobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */
CLSobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */
CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */
diff --git a/src/runtime/CL/functions/CLCannyEdge.cpp b/src/runtime/CL/functions/CLCannyEdge.cpp
index 448ca9289d..5acb8e7ddb 100644
--- a/src/runtime/CL/functions/CLCannyEdge.cpp
+++ b/src/runtime/CL/functions/CLCannyEdge.cpp
@@ -35,8 +35,9 @@
using namespace arm_compute;
-CLCannyEdge::CLCannyEdge() // NOLINT
- : _sobel(),
+CLCannyEdge::CLCannyEdge(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+ : _memory_group(std::move(memory_manager)),
+ _sobel(),
_gradient(),
_border_mag_gradient(),
_non_max_suppr(),
@@ -96,6 +97,10 @@ void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_t
TensorInfo info_s32(shape_l1_stack, 1, arm_compute::DataType::S32);
_l1_stack.allocator()->init(info_s32);
+ // Manage intermediate buffers
+ _memory_group.manage(&_gx);
+ _memory_group.manage(&_gy);
+
// Configure/Init sobelNxN
if(gradient_size == 3)
{
@@ -120,23 +125,43 @@ void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_t
ARM_COMPUTE_ERROR("Gradient %d size not supported", gradient_size);
}
+ // Manage intermediate buffers
+ _memory_group.manage(&_mag);
+ _memory_group.manage(&_phase);
+
// Configure gradient
_gradient.configure(&_gx, &_gy, &_mag, &_phase, norm_type);
+ // Allocate intermediate buffers
+ _gx.allocator()->allocate();
+ _gy.allocator()->allocate();
+
+ // Manage intermediate buffers
+ _memory_group.manage(&_nonmax);
+
// Configure non-maxima suppression
_non_max_suppr.configure(&_mag, &_phase, &_nonmax, lower_thr, border_mode == BorderMode::UNDEFINED);
+ // Allocate intermediate buffers
+ _phase.allocator()->allocate();
+
// Fill border around magnitude image as non-maxima suppression will access
// it. If border mode is undefined filling the border is a nop.
_border_mag_gradient.configure(&_mag, _non_max_suppr.border_size(), border_mode, constant_border_value);
+ // Allocate intermediate buffers
+ _mag.allocator()->allocate();
+
+ // Manage intermediate buffers
+ _memory_group.manage(&_visited);
+ _memory_group.manage(&_recorded);
+ _memory_group.manage(&_l1_stack);
+ _memory_group.manage(&_l1_list_counter);
+
// Configure edge tracing
_edge_trace.configure(&_nonmax, output, upper_thr, lower_thr, &_visited, &_recorded, &_l1_stack, &_l1_list_counter);
- _gx.allocator()->allocate();
- _gy.allocator()->allocate();
- _phase.allocator()->allocate();
- _mag.allocator()->allocate();
+ // Allocate intermediate buffers
_visited.allocator()->allocate();
_recorded.allocator()->allocate();
_l1_stack.allocator()->allocate();
@@ -146,6 +171,8 @@ void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_t
void CLCannyEdge::run()
{
+ _memory_group.acquire();
+
// Run sobel
_sobel->run();
@@ -165,4 +192,6 @@ void CLCannyEdge::run()
_l1_list_counter.clear(CLScheduler::get().queue());
_l1_stack.clear(CLScheduler::get().queue());
CLScheduler::get().enqueue(_edge_trace, true);
+
+ _memory_group.release();
}
diff --git a/src/runtime/CL/functions/CLFastCorners.cpp b/src/runtime/CL/functions/CLFastCorners.cpp
index d2903fb849..7a0dd09fbe 100644
--- a/src/runtime/CL/functions/CLFastCorners.cpp
+++ b/src/runtime/CL/functions/CLFastCorners.cpp
@@ -36,8 +36,9 @@
using namespace arm_compute;
-CLFastCorners::CLFastCorners()
- : _fast_corners_kernel(),
+CLFastCorners::CLFastCorners(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)),
+ _fast_corners_kernel(),
_suppr_func(),
_copy_array_kernel(),
_output(),
@@ -70,6 +71,7 @@ void CLFastCorners::configure(const ICLImage *input, float threshold, bool nonma
const bool update_number = (nullptr != _num_corners);
+ _memory_group.manage(&_output);
_fast_corners_kernel.configure(input, &_output, threshold, nonmax_suppression, border_mode);
if(!_non_max)
@@ -79,6 +81,7 @@ void CLFastCorners::configure(const ICLImage *input, float threshold, bool nonma
else
{
_suppr.allocator()->init(tensor_info);
+ _memory_group.manage(&_suppr);
_suppr_func.configure(&_output, &_suppr, border_mode);
_copy_array_kernel.configure(&_suppr, update_number, corners, &_num_buffer);
@@ -94,6 +97,8 @@ void CLFastCorners::run()
{
cl::CommandQueue q = CLScheduler::get().queue();
+ _memory_group.acquire();
+
if(_non_max)
{
ARM_COMPUTE_ERROR_ON_MSG(_output.cl_buffer().get() == nullptr, "Unconfigured function");
@@ -124,4 +129,6 @@ void CLFastCorners::run()
}
q.flush();
+
+ _memory_group.release();
}
diff --git a/src/runtime/CL/functions/CLGaussian5x5.cpp b/src/runtime/CL/functions/CLGaussian5x5.cpp
index e83a8fb857..f30eee1df7 100644
--- a/src/runtime/CL/functions/CLGaussian5x5.cpp
+++ b/src/runtime/CL/functions/CLGaussian5x5.cpp
@@ -35,8 +35,8 @@
using namespace arm_compute;
-CLGaussian5x5::CLGaussian5x5()
- : _kernel_hor(), _kernel_vert(), _border_handler(), _tmp()
+CLGaussian5x5::CLGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _kernel_hor(), _kernel_vert(), _border_handler(), _tmp()
{
}
@@ -46,6 +46,10 @@ void CLGaussian5x5::configure(ICLTensor *input, ICLTensor *output, BorderMode bo
_tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, DataType::U16));
+ // Manage intermediate buffers
+ _memory_group.manage(&_tmp);
+
+ // Configure kernels
_kernel_hor.configure(input, &_tmp, border_mode == BorderMode::UNDEFINED);
_kernel_vert.configure(&_tmp, output, border_mode == BorderMode::UNDEFINED);
_border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
@@ -57,6 +61,11 @@ void CLGaussian5x5::configure(ICLTensor *input, ICLTensor *output, BorderMode bo
void CLGaussian5x5::run()
{
CLScheduler::get().enqueue(_border_handler, false);
+
+ _memory_group.acquire();
+
CLScheduler::get().enqueue(_kernel_hor, false);
CLScheduler::get().enqueue(_kernel_vert);
+
+ _memory_group.release();
}
diff --git a/src/runtime/CL/functions/CLHarrisCorners.cpp b/src/runtime/CL/functions/CLHarrisCorners.cpp
index 2140240753..059528fe30 100644
--- a/src/runtime/CL/functions/CLHarrisCorners.cpp
+++ b/src/runtime/CL/functions/CLHarrisCorners.cpp
@@ -42,8 +42,9 @@
using namespace arm_compute;
-CLHarrisCorners::CLHarrisCorners() // NOLINT
- : _sobel(nullptr),
+CLHarrisCorners::CLHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+ : _memory_group(std::move(memory_manager)),
+ _sobel(nullptr),
_harris_score(),
_non_max_suppr(),
_candidates(),
@@ -84,6 +85,10 @@ void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist
_corners_list = arm_compute::support::cpp14::make_unique<InternalKeypoint[]>(shape.x() * shape.y());
+ // Manage intermediate buffers
+ _memory_group.manage(&_gx);
+ _memory_group.manage(&_gy);
+
/* Set/init Sobel kernel accordingly with gradient_size */
switch(gradient_size)
{
@@ -116,6 +121,9 @@ void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist
const float norm_factor = 1.0f / (255.0f * pow(4.0f, gradient_size / 2) * block_size);
const float pow4_normalization_factor = pow(norm_factor, 4);
+ // Manage intermediate buffers
+ _memory_group.manage(&_score);
+
// Set/init Harris Score kernel accordingly with block_size
_harris_score.configure(&_gx, &_gy, &_score, block_size, pow4_normalization_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
@@ -123,26 +131,35 @@ void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist
_border_gx.configure(&_gx, _harris_score.border_size(), border_mode, PixelValue(constant_border_value));
_border_gy.configure(&_gy, _harris_score.border_size(), border_mode, PixelValue(constant_border_value));
+ // Allocate intermediate buffers
+ _gx.allocator()->allocate();
+ _gy.allocator()->allocate();
+
+ // Manage intermediate buffers
+ _memory_group.manage(&_nonmax);
+
// Init non-maxima suppression function
_non_max_suppr.configure(&_score, &_nonmax, border_mode);
+ // Allocate intermediate buffers
+ _score.allocator()->allocate();
+
// Init corner candidates kernel
_candidates.configure(&_nonmax, _corners_list.get(), &_num_corner_candidates);
- // Init euclidean distance
- _sort_euclidean.configure(_corners_list.get(), _corners, &_num_corner_candidates, min_dist);
-
// Allocate intermediate buffers
- _gx.allocator()->allocate();
- _gy.allocator()->allocate();
- _score.allocator()->allocate();
_nonmax.allocator()->allocate();
+
+ // Init euclidean distance
+ _sort_euclidean.configure(_corners_list.get(), _corners, &_num_corner_candidates, min_dist);
}
void CLHarrisCorners::run()
{
ARM_COMPUTE_ERROR_ON_MSG(_sobel == nullptr, "Unconfigured function");
+ _memory_group.acquire();
+
// Init to 0 number of corner candidates
_num_corner_candidates = 0;
@@ -167,4 +184,6 @@ void CLHarrisCorners::run()
_corners->map(CLScheduler::get().queue(), true);
Scheduler::get().schedule(&_sort_euclidean, Window::DimY);
_corners->unmap(CLScheduler::get().queue());
+
+ _memory_group.release();
}
diff --git a/src/runtime/CL/functions/CLSobel5x5.cpp b/src/runtime/CL/functions/CLSobel5x5.cpp
index 098b546c1a..d4bc85524e 100644
--- a/src/runtime/CL/functions/CLSobel5x5.cpp
+++ b/src/runtime/CL/functions/CLSobel5x5.cpp
@@ -33,8 +33,8 @@
using namespace arm_compute;
-CLSobel5x5::CLSobel5x5()
- : _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y()
+CLSobel5x5::CLSobel5x5(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y()
{
}
@@ -51,6 +51,8 @@ void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out
{
_tmp_x.allocator()->init(tensor_info);
_tmp_y.allocator()->init(tensor_info);
+ _memory_group.manage(&_tmp_x);
+ _memory_group.manage(&_tmp_y);
_sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
_sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
@@ -59,6 +61,7 @@ void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out
else if(run_sobel_x)
{
_tmp_x.allocator()->init(tensor_info);
+ _memory_group.manage(&_tmp_x);
_sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
_sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
@@ -66,6 +69,7 @@ void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out
else if(run_sobel_y)
{
_tmp_y.allocator()->init(tensor_info);
+ _memory_group.manage(&_tmp_y);
_sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
_sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_y.allocator()->allocate();
@@ -76,6 +80,11 @@ void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out
void CLSobel5x5::run()
{
CLScheduler::get().enqueue(_border_handler, false);
+
+ _memory_group.acquire();
+
CLScheduler::get().enqueue(_sobel_hor, false);
CLScheduler::get().enqueue(_sobel_vert);
+
+ _memory_group.release();
}
diff --git a/src/runtime/CL/functions/CLSobel7x7.cpp b/src/runtime/CL/functions/CLSobel7x7.cpp
index db84fa99ae..60830905df 100644
--- a/src/runtime/CL/functions/CLSobel7x7.cpp
+++ b/src/runtime/CL/functions/CLSobel7x7.cpp
@@ -33,8 +33,8 @@
using namespace arm_compute;
-CLSobel7x7::CLSobel7x7()
- : _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y()
+CLSobel7x7::CLSobel7x7(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y()
{
}
@@ -51,6 +51,8 @@ void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out
{
_tmp_x.allocator()->init(tensor_info);
_tmp_y.allocator()->init(tensor_info);
+ _memory_group.manage(&_tmp_x);
+ _memory_group.manage(&_tmp_y);
_sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
_sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
@@ -59,6 +61,7 @@ void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out
else if(run_sobel_x)
{
_tmp_x.allocator()->init(tensor_info);
+ _memory_group.manage(&_tmp_x);
_sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
_sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
@@ -66,6 +69,7 @@ void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out
else if(run_sobel_y)
{
_tmp_y.allocator()->init(tensor_info);
+ _memory_group.manage(&_tmp_y);
_sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
_sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_y.allocator()->allocate();
@@ -76,6 +80,11 @@ void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *out
void CLSobel7x7::run()
{
CLScheduler::get().enqueue(_border_handler, false);
+
+ _memory_group.acquire();
+
CLScheduler::get().enqueue(_sobel_hor, false);
CLScheduler::get().enqueue(_sobel_vert);
+
+ _memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NEGaussian5x5.cpp b/src/runtime/NEON/functions/NEGaussian5x5.cpp
index f085975b1e..b010ca0104 100644
--- a/src/runtime/NEON/functions/NEGaussian5x5.cpp
+++ b/src/runtime/NEON/functions/NEGaussian5x5.cpp
@@ -57,9 +57,10 @@ void NEGaussian5x5::configure(ITensor *input, ITensor *output, BorderMode border
void NEGaussian5x5::run()
{
+ NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+
_memory_group.acquire();
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
NEScheduler::get().schedule(&_kernel_hor, Window::DimY);
NEScheduler::get().schedule(&_kernel_vert, Window::DimY);