aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/softmax_layer.cl
diff options
context:
space:
mode:
authorSang-Hoon Park <sang-hoon.park@arm.com>2019-10-29 13:13:19 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-10-31 22:26:59 +0000
commit62eeb53a5eee9d388a6074553175909fd1b441b5 (patch)
tree62e051ba5b4f73adb5ba909d623fd0323d2704e9 /src/core/CL/cl_kernels/softmax_layer.cl
parent44bfc3fe8dacfc4297702ca88323ea675a7c52e2 (diff)
downloadComputeLibrary-62eeb53a5eee9d388a6074553175909fd1b441b5.tar.gz
COMPMID-2266: [CL] add support for Log Softmax
Change-Id: I4a8f3519328553e24cbb4fe45a8ca4d47c90975d Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-on: https://review.mlplatform.org/c/2182 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/softmax_layer.cl')
-rw-r--r--src/core/CL/cl_kernels/softmax_layer.cl41
1 files changed, 40 insertions, 1 deletions
diff --git a/src/core/CL/cl_kernels/softmax_layer.cl b/src/core/CL/cl_kernels/softmax_layer.cl
index e549b44245..767cf4c4f7 100644
--- a/src/core/CL/cl_kernels/softmax_layer.cl
+++ b/src/core/CL/cl_kernels/softmax_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -111,7 +111,11 @@ __kernel void softmax_layer_norm(
DATA_TYPE sum_val = *((__global DATA_TYPE *)offset(&sum, 0, get_global_id(1)));
VEC_DATA_TYPE(DATA_TYPE, 16)
data = vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0));
+#ifdef LOG_SOFTMAX
+ vstore16(SUB_OP(data, sum_val, DATA_TYPE, 16), 0, (__global DATA_TYPE *)offset(&dst, 0, 0));
+#else /* LOG_SOFTMAX */
vstore16(DIV_OP(data, sum_val, DATA_TYPE, 16), 0, (__global DATA_TYPE *)offset(&dst, 0, 0));
+#endif /* LOG_SOFTMAX */
}
/** Identifies the maximum value across the 1st dimension and shifts the values of the input tensor by this maximum value,
@@ -226,9 +230,15 @@ __kernel void softmax_layer_max_shift_exp_sum_serial(
#ifdef BETA
data = MUL_OP(data, beta, DATA_TYPE, VECTOR_SIZE);
#endif /* BETA */
+#ifdef LOG_SOFTMAX
+ VSTORE(VECTOR_SIZE)
+ (data, 0, (__global DATA_TYPE *)offset(&dst, i << LOG_VECTOR_SIZE, 0));
+ data = EXP_OP(data, DATA_TYPE, VECTOR_SIZE);
+#else /* LOG_SOFTMAX */
data = EXP_OP(data, DATA_TYPE, VECTOR_SIZE);
VSTORE(VECTOR_SIZE)
(data, 0, (__global DATA_TYPE *)offset(&dst, i << LOG_VECTOR_SIZE, 0));
+#endif /* LOG_SOFTMAX */
sum1D = ADD_OP(sum1D, data, DATA_TYPE, VECTOR_SIZE);
}
@@ -239,11 +249,19 @@ __kernel void softmax_layer_max_shift_exp_sum_serial(
#ifdef BETA
data = MUL_OP(data, beta, DATA_TYPE, VECTOR_SIZE);
#endif /* BETA */
+#ifdef LOG_SOFTMAX
+ VSTORE(VECTOR_SIZE)
+ (data, 0, (__global DATA_TYPE *)offset(&dst, width_ << LOG_VECTOR_SIZE, 0));
+ data = EXP_OP(data, DATA_TYPE, VECTOR_SIZE);
+ widx = CONVERT((EXPAND((CL_VEC_DATA_TYPE(uint, VECTOR_SIZE)))(width_ << LOG_VECTOR_SIZE) + idx__) < width, VEC_DATA_TYPE(SELECT_DATA_TYPE, VECTOR_SIZE));
+ data = select(0, data, widx);
+#else /* LOG_SOFTMAX */
data = EXP_OP(data, DATA_TYPE, VECTOR_SIZE);
widx = CONVERT((EXPAND((CL_VEC_DATA_TYPE(uint, VECTOR_SIZE)))(width_ << LOG_VECTOR_SIZE) + idx__) < width, VEC_DATA_TYPE(SELECT_DATA_TYPE, VECTOR_SIZE));
data = select(0, data, widx);
VSTORE(VECTOR_SIZE)
(data, 0, (__global DATA_TYPE *)offset(&dst, width_ << LOG_VECTOR_SIZE, 0));
+#endif /* LOG_SOFTMAX */
sum1D = ADD_OP(sum1D, data, DATA_TYPE, VECTOR_SIZE);
#endif /* NON_MULTIPLE_OF_VECTOR_SIZE */
@@ -455,9 +473,15 @@ __kernel void softmax_layer_max_shift_exp_sum_parallel(
#ifdef BETA
data = MUL_OP(data, beta, DATA_TYPE, 4);
#endif /* BETA */
+#ifdef LOG_SOFTMAX
+ VSTORE(4)
+ (data, 0, (__global DATA_TYPE *)offset(&dst, i * GRID_SIZE * 4, 0));
+ data = EXP_OP(data, DATA_TYPE, 4);
+#else /* LOG_SOFTMAX */
data = EXP_OP(data, DATA_TYPE, 4);
VSTORE(4)
(data, 0, (__global DATA_TYPE *)offset(&dst, i * GRID_SIZE * 4, 0));
+#endif /* LOG_SOFTMAX */
sum1D = ADD_OP(sum1D, data, DATA_TYPE, 4);
}
#ifdef NON_MULTIPLE_OF_GRID_SIZE
@@ -471,9 +495,15 @@ __kernel void softmax_layer_max_shift_exp_sum_parallel(
#ifdef BETA
data = MUL_OP(data, beta, DATA_TYPE, 4);
#endif /* BETA */
+#ifdef LOG_SOFTMAX
+ VSTORE(4)
+ (data, 0, (__global DATA_TYPE *)offset(&dst, i * GRID_SIZE * 4, 0));
+ data = EXP_OP(data, DATA_TYPE, 4);
+#else /* LOG_SOFTMAX */
data = EXP_OP(data, DATA_TYPE, 4);
VSTORE(4)
(data, 0, (__global DATA_TYPE *)offset(&dst, i * GRID_SIZE * 4, 0));
+#endif /* LOG_SOFTMAX */
sum1D = ADD_OP(sum1D, data, DATA_TYPE, 4);
}
#ifdef NON_MULTIPLE_OF_VECTOR_SIZE
@@ -491,12 +521,21 @@ __kernel void softmax_layer_max_shift_exp_sum_parallel(
#ifdef BETA
data = MUL_OP(data, beta, DATA_TYPE, 4);
#endif /* BETA */
+#ifdef LOG_SOFTMAX
+ VSTORE(4)
+ (data, 0, (__global DATA_TYPE *)offset(&dst, (GRID_SIZE * i * 4) + 4, 0));
+ data = EXP_OP(data, DATA_TYPE, 4);
+ VEC_DATA_TYPE(SELECT_DATA_TYPE, 4)
+ widx = CONVERT(((uint4)(GRID_SIZE * i * 4) + boundary_workitems * 4 + idx4) < width, VEC_DATA_TYPE(SELECT_DATA_TYPE, 4));
+ data = select(0, data, widx);
+#else /* LOG_SOFTMAX */
data = EXP_OP(data, DATA_TYPE, 4);
VEC_DATA_TYPE(SELECT_DATA_TYPE, 4)
widx = CONVERT(((uint4)(GRID_SIZE * i * 4) + boundary_workitems * 4 + idx4) < width, VEC_DATA_TYPE(SELECT_DATA_TYPE, 4));
data = select(0, data, widx);
VSTORE(4)
(data, 0, (__global DATA_TYPE *)offset(&dst, (GRID_SIZE * i * 4) + 4, 0));
+#endif /* LOG_SOFTMAX */
sum1D = ADD_OP(sum1D, data, DATA_TYPE, 4);
}
#endif /* NON_MULTIPLE_OF_VECTOR_SIZE */