aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/tile_helpers.h
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2022-11-17 11:03:39 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2022-12-09 11:06:23 +0000
commit76335eb8d8733b0bbc0110546797211540870c50 (patch)
tree812fc44de593c9e1e45ac8b534094511b06163bf /src/core/CL/cl_kernels/tile_helpers.h
parentf16973b8b4605f12608bffa9f0ca6ed590202d41 (diff)
downloadComputeLibrary-76335eb8d8733b0bbc0110546797211540870c50.tar.gz
Implement the OpenCL kernel to compute the indirect convolution
- Implement indirect convolution kernel - Add operator support - Add test Resolves COMPMID-5709 Change-Id: I9272304163471a5a40da7fdec204599f3c1d8e32 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8701 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/tile_helpers.h')
-rw-r--r--src/core/CL/cl_kernels/tile_helpers.h20
1 files changed, 17 insertions, 3 deletions
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h
index 998bc9efb2..861ea63eca 100644
--- a/src/core/CL/cl_kernels/tile_helpers.h
+++ b/src/core/CL/cl_kernels/tile_helpers.h
@@ -653,13 +653,27 @@
}) \
})
-#define T_LOAD2D_INDIRECT(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, B, Y, X, C, TENSOR_WIDTH, TENSOR_HEIGHT, STRIDE_Y, yi, dst) \
+/** Load a tile from global memory (tensor) using an indirect buffer for the Y coordinates
+ *
+ * @param[in] DATA_TYPE Data type
+ * @param[in] TILE_AREA Number of elements to load from Y (height) dimension * Number of elements to load from X (width) dimension
+ * @param[in] TILE_CHANNELS Number of elements to load from C (channel) dimension
+ * @param[in] TENSOR_TYPE Type of cl_type used to store the tensor in global memory (BUFFER=cl_buffer, IMAGE=cl_image). Currently BUFFER only is supported
+ * In case of cl_image, only TILE_CHANNELS multiples of 4 are supported (4, 8, 16)
+ * @param[in] TENSOR Tensor basename
+ * @param[in] C Starting C index
+ * @param[in] STRIDE_Y Stride Y (in bytes)
+ * @param[out] yi A tile with (TILE_WIDTH x TILE_HEIGHT) values with the indirect Y coordinate
+ * 16 is the maximum indirect buffer size.
+ * @param[out] dst Output tile
+ */
+#define T_LOAD2D_INDIRECT(DATA_TYPE, TILE_AREA, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, STRIDE_Y, yi, dst) \
({ \
LOOP_UNROLLING(int, _i, 0, 1, TILE_AREA, \
{ \
- if(yi[_i].v >= 0) \
+ if(yi[0].s[_i] >= 0) \
{ \
- dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, yi[_i].v, STRIDE_Y); \
+ dst[_i].v = V_LOAD(DATA_TYPE, TILE_CHANNELS, TENSOR_TYPE, TENSOR, C, yi[0].s[_i], STRIDE_Y); \
} \
}) \
})