aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/tile_helpers.h
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2021-03-24 14:48:22 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2021-03-25 16:42:03 +0000
commita8903c871252412c70623b5f8e284c181238a9b2 (patch)
tree2760f8e7d005826f917a8cbed6b88577e8915d74 /src/core/CL/cl_kernels/tile_helpers.h
parentf73db971cfc36c82c1aa6409257a11f987aaea92 (diff)
downloadComputeLibrary-a8903c871252412c70623b5f8e284c181238a9b2.tar.gz
Improve performance of Winograd Output Transform 3x3
This patch reworks the winograd output transform 3x3 NHWC on OpenCL - Use utility macros in tile_helpers.h to rewrite the kernel - Implement the tile utility macro for the activation Resolves COMPMID-4144 Change-Id: I86a9bb9ea96b9629a18642b56bb63750710e6af5 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5324 Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/tile_helpers.h')
-rw-r--r--src/core/CL/cl_kernels/tile_helpers.h59
1 files changed, 40 insertions, 19 deletions
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h
index 19241cf219..b72430c026 100644
--- a/src/core/CL/cl_kernels/tile_helpers.h
+++ b/src/core/CL/cl_kernels/tile_helpers.h
@@ -160,7 +160,7 @@
#define V_LOAD_STR(DATA_TYPE, WIDTH, TENSOR_TYPE, TENSOR, X, Y, STRIDE_Y) V_LOAD_##TENSOR_TYPE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y)
#define V_LOAD_BUFFER(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y) \
VLOAD(WIDTH) \
- (0, (__global DATA_TYPE *)(TENSOR##_ptr + (X) * sizeof(DATA_TYPE) + (Y)*STRIDE_Y))
+ (0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (Y)*STRIDE_Y))
#define V_LOAD_IMAGE(DATA_TYPE, WIDTH, TENSOR, X, Y, STRIDE_Y) READ_IMAGE2D(DATA_TYPE, CONVERT_VECTOR_SIZE_TO_PIXEL_UNIT(WIDTH), TENSOR##_img, (X) / 4, (Y))
/** Load a tile from global memory (tensor)
@@ -223,24 +223,24 @@
* @param[in] src Input tile
* @param[in] indirect_y Indirect Y index tile
*/
-#define T_STORE_INDIRECT_WIDTH_SELECT(DATA_TYPE, HEIGHT, WIDTH0, WIDTH1, TENSOR_TYPE, TENSOR, X, STRIDE_Y, WIDTH1_CONDITION, src, indirect_y) \
- ({ \
- if(WIDTH1_CONDITION) \
- { \
- LOOP_UNROLLING(int, _i, 0, HEIGHT, 1) \
- { \
- VSTORE_PARTIAL(WIDTH0, WIDTH1) \
- (src[HEIGHT - 1 - _i].v, 0, (__global DATA_TYPE *)(TENSOR##_ptr + (X) * sizeof(DATA_TYPE) + (indirect_y[HEIGHT - 1 - _i].v) * STRIDE_Y)); \
- } \
- } \
- else \
- { \
- LOOP_UNROLLING(int, _i, 0, HEIGHT, 1) \
- { \
- VSTORE(WIDTH0) \
- (src[HEIGHT - 1 - _i].v, 0, (__global DATA_TYPE *)(TENSOR##_ptr + (X) * sizeof(DATA_TYPE) + (indirect_y[HEIGHT - 1 - _i].v) * STRIDE_Y)); \
- } \
- } \
+#define T_STORE_INDIRECT_WIDTH_SELECT(DATA_TYPE, HEIGHT, WIDTH0, WIDTH1, TENSOR_TYPE, TENSOR, X, STRIDE_Y, WIDTH1_CONDITION, src, indirect_y) \
+ ({ \
+ if(WIDTH1_CONDITION) \
+ { \
+ LOOP_UNROLLING(int, _i, 0, HEIGHT, 1) \
+ { \
+ VSTORE_PARTIAL(WIDTH0, WIDTH1) \
+ (src[HEIGHT - 1 - _i].v, 0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (indirect_y[HEIGHT - 1 - _i].v) * STRIDE_Y)); \
+ } \
+ } \
+ else \
+ { \
+ LOOP_UNROLLING(int, _i, 0, HEIGHT, 1) \
+ { \
+ VSTORE(WIDTH0) \
+ (src[HEIGHT - 1 - _i].v, 0, (__global DATA_TYPE *)(TENSOR##_ptr + TENSOR##_offset_first_element_in_bytes + (X) * sizeof(DATA_TYPE) + (indirect_y[HEIGHT - 1 - _i].v) * STRIDE_Y)); \
+ } \
+ } \
})
/** Offset correction for the QASYMM8 computation
@@ -330,6 +330,27 @@
} \
})
+/** Element-wise activation
+ *
+ * @note Performs: activation(LHS) = DST
+ *
+ * @param[in] DATA_TYPE SRC/DST data type
+ * @param[in] M0 Number of SRC/DST rows
+ * @param[in] N0 Number of SRC/DST columns
+ * @param[in] ACTIVATION_TYPE Activation type
+ * @param[in] A_VAL A value used for the activation (e.g. tanh_op, brelu,..)
+ * @param[in] B_VAL B value used for the activation (e.g. tanh_op, brelu,..)
+ * @param[out] src SRC tile
+ * @param[out] dst DST tile
+ */
+#define T_ACTIVATION(DATA_TYPE, M0, N0, ACTIVATION_TYPE, A_VAL, B_VAL, src, dst) \
+ ({ \
+ LOOP_UNROLLING(int, _m0, 0, M0, 1) \
+ { \
+ dst[_m0].v = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, N0, src[_m0].v, A_VAL, B_VAL); \
+ } \
+ })
+
/** Element-wise addition with a constant value
*
* @note Performs: LHS + constant = DST