aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAbe Mbise <abe.mbise@arm.com>2017-12-19 13:51:59 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:52:54 +0000
commit1b993389a3ac0cd1b0edc0b11e92fbdee127576f (patch)
tree1ffc39fa69baabaf2849058eb4ac8c204075630c /src
parent76c8564936a1e0d1be022a2f56dc0a52d638f5d7 (diff)
downloadComputeLibrary-1b993389a3ac0cd1b0edc0b11e92fbdee127576f.tar.gz
COMPMID-568: Implement Canny edge function for CL/NEON
Change-Id: Ic5f197463f962bac4b23663bcef7ac744be6fc2a Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114250 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/cl_kernels/canny.cl11
-rw-r--r--src/core/NEON/kernels/NECannyEdgeKernel.cpp50
-rw-r--r--src/runtime/CL/functions/CLCannyEdge.cpp15
-rw-r--r--src/runtime/NEON/functions/NECannyEdge.cpp19
4 files changed, 52 insertions, 43 deletions
diff --git a/src/core/CL/cl_kernels/canny.cl b/src/core/CL/cl_kernels/canny.cl
index 166d681755..f60359f0f4 100644
--- a/src/core/CL/cl_kernels/canny.cl
+++ b/src/core/CL/cl_kernels/canny.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -148,6 +148,9 @@ __kernel void combine_gradients_L2(
vstore4(convert_uchar4_sat_rte(p), 0, angle.ptr);
}
+#define EDGE 255
+#define NO_EDGE 0
+
/** Array that holds the relative coordinates offset for the neighbouring pixels.
*/
__constant short4 neighbours_coords[] =
@@ -203,6 +206,7 @@ __kernel void suppress_non_maximum(
DATA_TYPE_IN gradient = *((__global DATA_TYPE_IN *)grad.ptr);
uchar an = convert_ushort(*angle.ptr);
+ // Early return if not greater than lower threshold
if(gradient <= lower_thr)
{
return;
@@ -224,7 +228,6 @@ __kernel void suppress_non_maximum(
}
}
-#define EDGE 255
#define hysteresis_local_stack_L1 8 // The size of level 1 stack. This has to agree with the host side
#define hysteresis_local_stack_L2 16 // The size of level 2 stack, adjust this can impact the match rate with VX implementation
@@ -333,7 +336,7 @@ kernel void hysteresis(
// If less than upper threshold set to NO_EDGE and return
if(val <= up_thr)
{
- *offset(&out, x, y) = 0;
+ *offset(&out, x, y) = NO_EDGE;
return;
}
@@ -372,7 +375,7 @@ kernel void hysteresis(
// Get direction pixel indices
int N = max(y - 1, 0), S = min(y + 1, height - 2), W = max(x - 1, 0), E = min(x + 1, width - 2);
- // Check 8 pixels around for week edges where low_thr < val <= up_thr
+ // Check 8 pixels around for weak edges where low_thr < val <= up_thr
x_tmp = vload4(0, (__global DATA_TYPE_IN *)offset(&src, W, N));
v_tmp = vload4(0, (__global uint *)offset(&visited, W, N));
check_pixel(((x_tmp.s0 <= low_thr) || v_tmp.s0 || (x_tmp.s0 > up_thr)), W, N, x, y); // NW
diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp
index 9dfd580a25..dc37452415 100644
--- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp
+++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -567,29 +567,29 @@ inline uint16x4_t non_max_U32_helper(const uint32_t *in, const uint16x4_t pc, co
const uint32x4_t mk0_0 = vld1q_u32(in - 1);
const uint32x4_t mk0_1 = vld1q_u32(in + 1);
uint32x4_t mask0 = vceqq_u32(pc32, vdupq_n_u32(0));
- mask0 = vandq_u32(mask0, vcgeq_u32(mc, mk0_0));
- mask0 = vandq_u32(mask0, vcgeq_u32(mc, mk0_1));
+ mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_0));
+ mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_1));
// 45 degree
const uint32x4_t mk45_0 = vld1q_u32(in - stride_mag - 1);
const uint32x4_t mk45_1 = vld1q_u32(in + stride_mag + 1);
uint32x4_t mask1 = vceqq_u32(pc32, vdupq_n_u32(1));
- mask1 = vandq_u32(mask1, vcgeq_u32(mc, mk45_0));
- mask1 = vandq_u32(mask1, vcgeq_u32(mc, mk45_1));
+ mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_0));
+ mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_1));
// 90 degree
const uint32x4_t mk90_0 = vld1q_u32(in - stride_mag);
const uint32x4_t mk90_1 = vld1q_u32(in + stride_mag);
uint32x4_t mask2 = vceqq_u32(pc32, vdupq_n_u32(2));
- mask2 = vandq_u32(mask2, vcgeq_u32(mc, mk90_0));
- mask2 = vandq_u32(mask2, vcgeq_u32(mc, mk90_1));
+ mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_0));
+ mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_1));
// 135 degree
const uint32x4_t mk135_0 = vld1q_u32(in - stride_mag + 1);
const uint32x4_t mk135_1 = vld1q_u32(in + stride_mag - 1);
uint32x4_t mask3 = vceqq_u32(pc32, vdupq_n_u32(3));
- mask3 = vandq_u32(mask3, vcgeq_u32(mc, mk135_0));
- mask3 = vandq_u32(mask3, vcgeq_u32(mc, mk135_1));
+ mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_0));
+ mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_1));
// Merge masks
mask0 = vorrq_u32(mask0, mask1);
@@ -1338,29 +1338,29 @@ void non_max_suppression_U16_U8_U8(const void *__restrict magnitude_ptr, const v
const uint16x8_t mk0_0 = vld1q_u16(magnitude - 1);
const uint16x8_t mk0_1 = vld1q_u16(magnitude + 1);
uint16x8_t mask0 = vceqq_u16(pc16, vdupq_n_u16(0));
- mask0 = vandq_u16(mask0, vcgeq_u16(mc, mk0_0));
- mask0 = vandq_u16(mask0, vcgeq_u16(mc, mk0_1));
+ mask0 = vandq_u16(mask0, vcgtq_u16(mc, mk0_0));
+ mask0 = vandq_u16(mask0, vcgtq_u16(mc, mk0_1));
// 45 degree
const uint16x8_t mk45_0 = vld1q_u16(magnitude - stride_mag - 1);
const uint16x8_t mk45_1 = vld1q_u16(magnitude + stride_mag + 1);
uint16x8_t mask1 = vceqq_u16(pc16, vdupq_n_u16(1));
- mask1 = vandq_u16(mask1, vcgeq_u16(mc, mk45_0));
- mask1 = vandq_u16(mask1, vcgeq_u16(mc, mk45_1));
+ mask1 = vandq_u16(mask1, vcgtq_u16(mc, mk45_0));
+ mask1 = vandq_u16(mask1, vcgtq_u16(mc, mk45_1));
// 90 degree
const uint16x8_t mk90_0 = vld1q_u16(magnitude - stride_mag);
const uint16x8_t mk90_1 = vld1q_u16(magnitude + stride_mag);
uint16x8_t mask2 = vceqq_u16(pc16, vdupq_n_u16(2));
- mask2 = vandq_u16(mask2, vcgeq_u16(mc, mk90_0));
- mask2 = vandq_u16(mask2, vcgeq_u16(mc, mk90_1));
+ mask2 = vandq_u16(mask2, vcgtq_u16(mc, mk90_0));
+ mask2 = vandq_u16(mask2, vcgtq_u16(mc, mk90_1));
// 135 degree
const uint16x8_t mk135_0 = vld1q_u16(magnitude - stride_mag + 1);
const uint16x8_t mk135_1 = vld1q_u16(magnitude + stride_mag - 1);
uint16x8_t mask3 = vceqq_u16(pc16, vdupq_n_u16(3));
- mask3 = vandq_u16(mask3, vcgeq_u16(mc, mk135_0));
- mask3 = vandq_u16(mask3, vcgeq_u16(mc, mk135_1));
+ mask3 = vandq_u16(mask3, vcgtq_u16(mc, mk135_0));
+ mask3 = vandq_u16(mask3, vcgtq_u16(mc, mk135_1));
// Merge masks
mask0 = vorrq_u16(mask0, mask1);
@@ -1399,29 +1399,29 @@ inline uint16x4_t non_max_U32_helper(const uint32_t *input, const uint16x4_t pc,
const uint32x4_t mk0_0 = vld1q_u32(input - 1);
const uint32x4_t mk0_1 = vld1q_u32(input + 1);
uint32x4_t mask0 = vceqq_u32(pc32, vdupq_n_u32(0));
- mask0 = vandq_u32(mask0, vcgeq_u32(mc, mk0_0));
- mask0 = vandq_u32(mask0, vcgeq_u32(mc, mk0_1));
+ mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_0));
+ mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_1));
// 45 degree
const uint32x4_t mk45_0 = vld1q_u32(input - stride_mag - 1);
const uint32x4_t mk45_1 = vld1q_u32(input + stride_mag + 1);
uint32x4_t mask1 = vceqq_u32(pc32, vdupq_n_u32(1));
- mask1 = vandq_u32(mask1, vcgeq_u32(mc, mk45_0));
- mask1 = vandq_u32(mask1, vcgeq_u32(mc, mk45_1));
+ mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_0));
+ mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_1));
// 90 degree
const uint32x4_t mk90_0 = vld1q_u32(input - stride_mag);
const uint32x4_t mk90_1 = vld1q_u32(input + stride_mag);
uint32x4_t mask2 = vceqq_u32(pc32, vdupq_n_u32(2));
- mask2 = vandq_u32(mask2, vcgeq_u32(mc, mk90_0));
- mask2 = vandq_u32(mask2, vcgeq_u32(mc, mk90_1));
+ mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_0));
+ mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_1));
// 135 degree
const uint32x4_t mk135_0 = vld1q_u32(input - stride_mag + 1);
const uint32x4_t mk135_1 = vld1q_u32(input + stride_mag - 1);
uint32x4_t mask3 = vceqq_u32(pc32, vdupq_n_u32(3));
- mask3 = vandq_u32(mask3, vcgeq_u32(mc, mk135_0));
- mask3 = vandq_u32(mask3, vcgeq_u32(mc, mk135_1));
+ mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_0));
+ mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_1));
// Merge masks
mask0 = vorrq_u32(mask0, mask1);
diff --git a/src/runtime/CL/functions/CLCannyEdge.cpp b/src/runtime/CL/functions/CLCannyEdge.cpp
index 5acb8e7ddb..ed5834531e 100644
--- a/src/runtime/CL/functions/CLCannyEdge.cpp
+++ b/src/runtime/CL/functions/CLCannyEdge.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -50,17 +50,23 @@ CLCannyEdge::CLCannyEdge(std::shared_ptr<IMemoryManager> memory_manager) // NOLI
_visited(),
_recorded(),
_l1_list_counter(),
- _l1_stack()
+ _l1_stack(),
+ _output(nullptr)
{
}
-void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value)
+void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode,
+ uint8_t constant_border_value)
{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON((1 != norm_type) && (2 != norm_type));
+ ARM_COMPUTE_ERROR_ON((gradient_size != 3) && (gradient_size != 5) && (gradient_size != 7));
ARM_COMPUTE_ERROR_ON(lower_thr > upper_thr);
+ _output = output;
+
const unsigned int L1_hysteresis_stack_size = 8;
const TensorShape shape = input->info()->tensor_shape();
@@ -122,7 +128,7 @@ void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_t
}
else
{
- ARM_COMPUTE_ERROR("Gradient %d size not supported", gradient_size);
+ ARM_COMPUTE_ERROR("Gradient size %d not supported", gradient_size);
}
// Manage intermediate buffers
@@ -187,6 +193,7 @@ void CLCannyEdge::run()
CLScheduler::get().enqueue(_non_max_suppr, false);
// Clear temporary structures and run edge trace
+ _output->clear(CLScheduler::get().queue());
_visited.clear(CLScheduler::get().queue());
_recorded.clear(CLScheduler::get().queue());
_l1_list_counter.clear(CLScheduler::get().queue());
diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp
index c27ff2f935..1d73148f47 100644
--- a/src/runtime/NEON/functions/NECannyEdge.cpp
+++ b/src/runtime/NEON/functions/NECannyEdge.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -61,12 +61,12 @@ NECannyEdge::NECannyEdge(std::shared_ptr<IMemoryManager> memory_manager) // NOLI
void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value,
bool use_fp16)
{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(gradient_size < 3);
- ARM_COMPUTE_ERROR_ON(gradient_size > 7);
- ARM_COMPUTE_ERROR_ON(lower_thr > upper_thr);
ARM_COMPUTE_ERROR_ON((1 != norm_type) && (2 != norm_type));
+ ARM_COMPUTE_ERROR_ON((gradient_size != 3) && (gradient_size != 5) && (gradient_size != 7));
+ ARM_COMPUTE_ERROR_ON(lower_thr > upper_thr);
_output = output;
@@ -119,7 +119,7 @@ void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr,
}
else
{
- ARM_COMPUTE_ERROR("Gradient size not supported\n");
+ ARM_COMPUTE_ERROR("Gradient size %d not supported\n", gradient_size);
}
// Manage intermediate buffers
@@ -171,24 +171,23 @@ void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr,
void NECannyEdge::run()
{
ARM_COMPUTE_ERROR_ON_MSG(_sobel == nullptr, "Unconfigured function");
- ARM_COMPUTE_ERROR_ON(_output == nullptr);
_memory_group.acquire();
// Run sobelNxN
_sobel->run();
- // Fill border before non-maxima suppression. Nop for border mode undefined.
- NEScheduler::get().schedule(&_border_mag_gradient, Window::DimZ);
-
// Run gradient
NEScheduler::get().schedule(_gradient.get(), Window::DimY);
+ // Fill border before non-maxima suppression. Nop for border mode undefined.
+ NEScheduler::get().schedule(&_border_mag_gradient, Window::DimZ);
+
// Run non-maxima suppression
NEScheduler::get().schedule(&_non_max_suppr, Window::DimY);
ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr);
- memset(_output->buffer(), 0, _output->info()->total_size());
+ std::fill_n(_output->buffer(), _output->info()->total_size(), 0);
// Fill border before edge trace
NEScheduler::get().schedule(&_border_edge_trace, Window::DimZ);