aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/warp_helpers.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/cl_kernels/warp_helpers.h')
-rw-r--r--src/core/CL/cl_kernels/warp_helpers.h65
1 files changed, 32 insertions, 33 deletions
diff --git a/src/core/CL/cl_kernels/warp_helpers.h b/src/core/CL/cl_kernels/warp_helpers.h
index 005861ddfa..6595bd1981 100644
--- a/src/core/CL/cl_kernels/warp_helpers.h
+++ b/src/core/CL/cl_kernels/warp_helpers.h
@@ -31,11 +31,13 @@
* @param[in] border_size Border size of the image
*
*/
-inline const float8 clamp_to_border_with_size(float8 coords, const float width, const float height, const float border_size)
+inline const float8
+clamp_to_border_with_size(float8 coords, const float width, const float height, const float border_size)
{
const float4 clamped_x = clamp(coords.even, 0.0f - border_size, width - 1 + border_size);
const float4 clamped_y = clamp(coords.odd, 0.0f - border_size, height - 1 + border_size);
- return (float8)(clamped_x.s0, clamped_y.s0, clamped_x.s1, clamped_y.s1, clamped_x.s2, clamped_y.s2, clamped_x.s3, clamped_y.s3);
+ return (float8)(clamped_x.s0, clamped_y.s0, clamped_x.s1, clamped_y.s1, clamped_x.s2, clamped_y.s2, clamped_x.s3,
+ clamped_y.s3);
}
/** Clamps the given coordinates to the borders.
@@ -63,12 +65,6 @@ inline const VEC_DATA_TYPE(DATA_TYPE, 4) read_texels4(const Image *in, const int
*((__global DATA_TYPE *)offset(in, coords.s6, coords.s7)));
}
-/** Returns the current thread coordinates. */
-inline const float2 get_current_coords()
-{
- return (float2)(get_global_id(0) * 4, get_global_id(1));
-}
-
/** Given a texel coordinates this function will return the following array of coordinates:
* [ P, right neighbour, below neighbour, below right neighbour ]
*
@@ -80,7 +76,8 @@ inline const float2 get_current_coords()
*/
inline const float8 get_neighbour_coords(const float2 coord)
{
- return (float8)(/*tl*/ coord.s0, coord.s1, /*tr*/ coord.s0 + 1, coord.s1, /*bl*/ coord.s0, coord.s1 + 1, /*br*/ coord.s0 + 1, coord.s1 + 1);
+ return (float8)(/*tl*/ coord.s0, coord.s1, /*tr*/ coord.s0 + 1, coord.s1, /*bl*/ coord.s0, coord.s1 + 1,
+ /*br*/ coord.s0 + 1, coord.s1 + 1);
}
/** Computes the bilinear interpolation for each set of coordinates in the vector coords and returns the values
@@ -91,37 +88,38 @@ inline const float8 get_neighbour_coords(const float2 coord)
* @param[in] height Height of the image
* @param[in] border_size Border size
*/
-inline const VEC_DATA_TYPE(DATA_TYPE, 4) bilinear_interpolate_with_border(const Image *in, const float8 coords, const float width, const float height, const float border_size)
+inline const VEC_DATA_TYPE(DATA_TYPE, 4) bilinear_interpolate_with_border(
+ const Image *in, const float8 coords, const float width, const float height, const float border_size)
{
// If any of the 4 texels is out of the image's boundaries we use the border value (REPLICATE or CONSTANT) for any texel out of the image.
// Sets the 4x4 coordinates for each of the four input texels
const float8 fc = floor(coords);
- const float16 c1 = (float16)(
- clamp_to_border_with_size(get_neighbour_coords((float2)(fc.s0, fc.s1)), width, height, border_size),
- clamp_to_border_with_size(get_neighbour_coords((float2)(fc.s2, fc.s3)), width, height, border_size));
- const float16 c2 = (float16)(
- clamp_to_border_with_size(get_neighbour_coords((float2)(fc.s4, fc.s5)), width, height, border_size),
- clamp_to_border_with_size(get_neighbour_coords((float2)(fc.s6, fc.s7)), width, height, border_size));
+ const float16 c1 =
+ (float16)(clamp_to_border_with_size(get_neighbour_coords((float2)(fc.s0, fc.s1)), width, height, border_size),
+ clamp_to_border_with_size(get_neighbour_coords((float2)(fc.s2, fc.s3)), width, height, border_size));
+ const float16 c2 =
+ (float16)(clamp_to_border_with_size(get_neighbour_coords((float2)(fc.s4, fc.s5)), width, height, border_size),
+ clamp_to_border_with_size(get_neighbour_coords((float2)(fc.s6, fc.s7)), width, height, border_size));
// Loads the values from the input image
const float16 t = (float16)(
- /* tl, tr, bl, br */
- * ((__global DATA_TYPE *)offset(in, c1.s0, c1.s1)), *((__global DATA_TYPE *)offset(in, c1.s2, c1.s3)),
- *((__global DATA_TYPE *)offset(in, c1.s4, c1.s5)), *((__global DATA_TYPE *)offset(in, c1.s6, c1.s7)),
- *((__global DATA_TYPE *)offset(in, c1.s8, c1.s9)), *((__global DATA_TYPE *)offset(in, c1.sa, c1.sb)),
- *((__global DATA_TYPE *)offset(in, c1.sc, c1.sd)), *((__global DATA_TYPE *)offset(in, c1.se, c1.sf)),
- *((__global DATA_TYPE *)offset(in, c2.s0, c2.s1)), *((__global DATA_TYPE *)offset(in, c2.s2, c2.s3)),
- *((__global DATA_TYPE *)offset(in, c2.s4, c2.s5)), *((__global DATA_TYPE *)offset(in, c2.s6, c2.s7)),
- *((__global DATA_TYPE *)offset(in, c2.s8, c2.s9)), *((__global DATA_TYPE *)offset(in, c2.sa, c2.sb)),
- *((__global DATA_TYPE *)offset(in, c2.sc, c2.sd)), *((__global DATA_TYPE *)offset(in, c2.se, c2.sf)));
- const float8 a = coords - fc;
- const float8 b = ((float8)(1.f)) - a;
- const float4 fr = (float4)(
- ((t.s0 * b.s0 * b.s1) + (t.s1 * a.s0 * b.s1) + (t.s2 * b.s0 * a.s1) + (t.s3 * a.s0 * a.s1)),
- ((t.s4 * b.s2 * b.s3) + (t.s5 * a.s2 * b.s3) + (t.s6 * b.s2 * a.s3) + (t.s7 * a.s2 * a.s3)),
- ((t.s8 * b.s4 * b.s5) + (t.s9 * a.s4 * b.s5) + (t.sa * b.s4 * a.s5) + (t.sb * a.s4 * a.s5)),
- ((t.sc * b.s6 * b.s7) + (t.sd * a.s6 * b.s7) + (t.se * b.s6 * a.s7) + (t.sf * a.s6 * a.s7)));
+ /* tl, tr, bl, br */
+ *((__global DATA_TYPE *)offset(in, c1.s0, c1.s1)), *((__global DATA_TYPE *)offset(in, c1.s2, c1.s3)),
+ *((__global DATA_TYPE *)offset(in, c1.s4, c1.s5)), *((__global DATA_TYPE *)offset(in, c1.s6, c1.s7)),
+ *((__global DATA_TYPE *)offset(in, c1.s8, c1.s9)), *((__global DATA_TYPE *)offset(in, c1.sa, c1.sb)),
+ *((__global DATA_TYPE *)offset(in, c1.sc, c1.sd)), *((__global DATA_TYPE *)offset(in, c1.se, c1.sf)),
+ *((__global DATA_TYPE *)offset(in, c2.s0, c2.s1)), *((__global DATA_TYPE *)offset(in, c2.s2, c2.s3)),
+ *((__global DATA_TYPE *)offset(in, c2.s4, c2.s5)), *((__global DATA_TYPE *)offset(in, c2.s6, c2.s7)),
+ *((__global DATA_TYPE *)offset(in, c2.s8, c2.s9)), *((__global DATA_TYPE *)offset(in, c2.sa, c2.sb)),
+ *((__global DATA_TYPE *)offset(in, c2.sc, c2.sd)), *((__global DATA_TYPE *)offset(in, c2.se, c2.sf)));
+ const float8 a = coords - fc;
+ const float8 b = ((float8)(1.f)) - a;
+ const float4 fr =
+ (float4)(((t.s0 * b.s0 * b.s1) + (t.s1 * a.s0 * b.s1) + (t.s2 * b.s0 * a.s1) + (t.s3 * a.s0 * a.s1)),
+ ((t.s4 * b.s2 * b.s3) + (t.s5 * a.s2 * b.s3) + (t.s6 * b.s2 * a.s3) + (t.s7 * a.s2 * a.s3)),
+ ((t.s8 * b.s4 * b.s5) + (t.s9 * a.s4 * b.s5) + (t.sa * b.s4 * a.s5) + (t.sb * a.s4 * a.s5)),
+ ((t.sc * b.s6 * b.s7) + (t.sd * a.s6 * b.s7) + (t.se * b.s6 * a.s7) + (t.sf * a.s6 * a.s7)));
return CONVERT(fr, VEC_DATA_TYPE(DATA_TYPE, 4));
}
@@ -132,7 +130,8 @@ inline const VEC_DATA_TYPE(DATA_TYPE, 4) bilinear_interpolate_with_border(const
* @param[in] width Width of the image
* @param[in] height Height of the image
*/
-inline const VEC_DATA_TYPE(DATA_TYPE, 4) bilinear_interpolate(const Image *in, const float8 coords, const float width, const float height)
+inline const VEC_DATA_TYPE(DATA_TYPE, 4)
+ bilinear_interpolate(const Image *in, const float8 coords, const float width, const float height)
{
return bilinear_interpolate_with_border(in, coords, width, height, 1);
}