diff options
author | Sang-Hoon Park <sang-hoon.park@arm.com> | 2019-10-29 13:13:19 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-10-31 22:26:59 +0000 |
commit | 62eeb53a5eee9d388a6074553175909fd1b441b5 (patch) | |
tree | 62e051ba5b4f73adb5ba909d623fd0323d2704e9 /src/core/CL/cl_kernels | |
parent | 44bfc3fe8dacfc4297702ca88323ea675a7c52e2 (diff) | |
download | ComputeLibrary-62eeb53a5eee9d388a6074553175909fd1b441b5.tar.gz |
COMPMID-2266: [CL] add support for Log Softmax
Change-Id: I4a8f3519328553e24cbb4fe45a8ca4d47c90975d
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2182
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels')
-rw-r--r-- | src/core/CL/cl_kernels/softmax_layer.cl | 41 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/softmax_layer_quantized.cl | 11 |
2 files changed, 49 insertions, 3 deletions
diff --git a/src/core/CL/cl_kernels/softmax_layer.cl b/src/core/CL/cl_kernels/softmax_layer.cl index e549b44245..767cf4c4f7 100644 --- a/src/core/CL/cl_kernels/softmax_layer.cl +++ b/src/core/CL/cl_kernels/softmax_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -111,7 +111,11 @@ __kernel void softmax_layer_norm( DATA_TYPE sum_val = *((__global DATA_TYPE *)offset(&sum, 0, get_global_id(1))); VEC_DATA_TYPE(DATA_TYPE, 16) data = vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0)); +#ifdef LOG_SOFTMAX + vstore16(SUB_OP(data, sum_val, DATA_TYPE, 16), 0, (__global DATA_TYPE *)offset(&dst, 0, 0)); +#else /* LOG_SOFTMAX */ vstore16(DIV_OP(data, sum_val, DATA_TYPE, 16), 0, (__global DATA_TYPE *)offset(&dst, 0, 0)); +#endif /* LOG_SOFTMAX */ } /** Identifies the maximum value across the 1st dimension and shifts the values of the input tensor by this maximum value, @@ -226,9 +230,15 @@ __kernel void softmax_layer_max_shift_exp_sum_serial( #ifdef BETA data = MUL_OP(data, beta, DATA_TYPE, VECTOR_SIZE); #endif /* BETA */ +#ifdef LOG_SOFTMAX + VSTORE(VECTOR_SIZE) + (data, 0, (__global DATA_TYPE *)offset(&dst, i << LOG_VECTOR_SIZE, 0)); + data = EXP_OP(data, DATA_TYPE, VECTOR_SIZE); +#else /* LOG_SOFTMAX */ data = EXP_OP(data, DATA_TYPE, VECTOR_SIZE); VSTORE(VECTOR_SIZE) (data, 0, (__global DATA_TYPE *)offset(&dst, i << LOG_VECTOR_SIZE, 0)); +#endif /* LOG_SOFTMAX */ sum1D = ADD_OP(sum1D, data, DATA_TYPE, VECTOR_SIZE); } @@ -239,11 +249,19 @@ __kernel void softmax_layer_max_shift_exp_sum_serial( #ifdef BETA data = MUL_OP(data, beta, DATA_TYPE, VECTOR_SIZE); #endif /* BETA */ +#ifdef LOG_SOFTMAX + VSTORE(VECTOR_SIZE) + (data, 0, (__global DATA_TYPE *)offset(&dst, width_ << LOG_VECTOR_SIZE, 0)); + data = EXP_OP(data, DATA_TYPE, VECTOR_SIZE); + widx = CONVERT((EXPAND((CL_VEC_DATA_TYPE(uint, VECTOR_SIZE)))(width_ << LOG_VECTOR_SIZE) + idx__) < width, VEC_DATA_TYPE(SELECT_DATA_TYPE, VECTOR_SIZE)); + data = select(0, data, widx); +#else /* LOG_SOFTMAX */ data = EXP_OP(data, DATA_TYPE, VECTOR_SIZE); widx = CONVERT((EXPAND((CL_VEC_DATA_TYPE(uint, VECTOR_SIZE)))(width_ << LOG_VECTOR_SIZE) + idx__) < width, VEC_DATA_TYPE(SELECT_DATA_TYPE, VECTOR_SIZE)); data = select(0, data, widx); VSTORE(VECTOR_SIZE) (data, 0, (__global DATA_TYPE *)offset(&dst, width_ << LOG_VECTOR_SIZE, 0)); +#endif /* LOG_SOFTMAX */ sum1D = ADD_OP(sum1D, data, DATA_TYPE, VECTOR_SIZE); #endif /* NON_MULTIPLE_OF_VECTOR_SIZE */ @@ -455,9 +473,15 @@ __kernel void softmax_layer_max_shift_exp_sum_parallel( #ifdef BETA data = MUL_OP(data, beta, DATA_TYPE, 4); #endif /* BETA */ +#ifdef LOG_SOFTMAX + VSTORE(4) + (data, 0, (__global DATA_TYPE *)offset(&dst, i * GRID_SIZE * 4, 0)); + data = EXP_OP(data, DATA_TYPE, 4); +#else /* LOG_SOFTMAX */ data = EXP_OP(data, DATA_TYPE, 4); VSTORE(4) (data, 0, (__global DATA_TYPE *)offset(&dst, i * GRID_SIZE * 4, 0)); +#endif /* LOG_SOFTMAX */ sum1D = ADD_OP(sum1D, data, DATA_TYPE, 4); } #ifdef NON_MULTIPLE_OF_GRID_SIZE @@ -471,9 +495,15 @@ __kernel void softmax_layer_max_shift_exp_sum_parallel( #ifdef BETA data = MUL_OP(data, beta, DATA_TYPE, 4); #endif /* BETA */ +#ifdef LOG_SOFTMAX + VSTORE(4) + (data, 0, (__global DATA_TYPE *)offset(&dst, i * GRID_SIZE * 4, 0)); + data = EXP_OP(data, DATA_TYPE, 4); +#else /* LOG_SOFTMAX */ data = EXP_OP(data, DATA_TYPE, 4); VSTORE(4) (data, 0, (__global DATA_TYPE *)offset(&dst, i * GRID_SIZE * 4, 0)); +#endif /* LOG_SOFTMAX */ sum1D = ADD_OP(sum1D, data, DATA_TYPE, 4); } #ifdef NON_MULTIPLE_OF_VECTOR_SIZE @@ -491,12 +521,21 @@ __kernel void softmax_layer_max_shift_exp_sum_parallel( #ifdef BETA data = MUL_OP(data, beta, DATA_TYPE, 4); #endif /* BETA */ +#ifdef LOG_SOFTMAX + VSTORE(4) + (data, 0, (__global DATA_TYPE *)offset(&dst, (GRID_SIZE * i * 4) + 4, 0)); + data = EXP_OP(data, DATA_TYPE, 4); + VEC_DATA_TYPE(SELECT_DATA_TYPE, 4) + widx = CONVERT(((uint4)(GRID_SIZE * i * 4) + boundary_workitems * 4 + idx4) < width, VEC_DATA_TYPE(SELECT_DATA_TYPE, 4)); + data = select(0, data, widx); +#else /* LOG_SOFTMAX */ data = EXP_OP(data, DATA_TYPE, 4); VEC_DATA_TYPE(SELECT_DATA_TYPE, 4) widx = CONVERT(((uint4)(GRID_SIZE * i * 4) + boundary_workitems * 4 + idx4) < width, VEC_DATA_TYPE(SELECT_DATA_TYPE, 4)); data = select(0, data, widx); VSTORE(4) (data, 0, (__global DATA_TYPE *)offset(&dst, (GRID_SIZE * i * 4) + 4, 0)); +#endif /* LOG_SOFTMAX */ sum1D = ADD_OP(sum1D, data, DATA_TYPE, 4); } #endif /* NON_MULTIPLE_OF_VECTOR_SIZE */ diff --git a/src/core/CL/cl_kernels/softmax_layer_quantized.cl b/src/core/CL/cl_kernels/softmax_layer_quantized.cl index 95d6d4bcc5..8ccc5d3dd5 100644 --- a/src/core/CL/cl_kernels/softmax_layer_quantized.cl +++ b/src/core/CL/cl_kernels/softmax_layer_quantized.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #define MAX_OP(x, y, type, size) max((x), (y)) #define ADD_OP(x, y, type, size) ((x) + (y)) +#define SUB_OP(x, y, type, size) ((x) - (y)) /* Number of workitems in dimension 0. */ #if !defined(GRID_SIZE) @@ -559,12 +560,14 @@ __kernel void softmax_layer_norm_quantized( int sum_val = *((__global int *)offset(&sum, 0, get_global_id(1))); // It will be better to calculate this in prev layer and pass here as parameter - uint sum_val_u = convert_uint(sum_val); + uint sum_val_u = convert_uint(sum_val); +#ifndef LOG_SOFTMAX int headroom_plus_one = clz(sum_val_u); int num_bits_over_unit = EXP_ACCUMULATION_INT_BITS - headroom_plus_one; int shifted_sum_minus_one_1 = convert_int((sum_val_u << headroom_plus_one) - (1u << 31)); int16 shifted_sum_minus_one = shifted_sum_minus_one_1; int16 shifted_scale = ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1(shifted_sum_minus_one, 16); +#endif /* LOG_SOFTMAX */ // It was already calculated in prev layer, should be stored into tmp output and reused int16 data_diff = vload16(0, (__global int *)offset(&src, 0, 0)); @@ -577,8 +580,12 @@ __kernel void softmax_layer_norm_quantized( #endif /* defined(INPUT_BETA_MULTIPLIER) && defined(INPUT_BETA_LEFT_SHIFT) */ int16 data = ASYMM_EXP_ON_NEGATIVE_VALUES(data_diff_mult, SCALED_DIFF_INT_BITS, 16); +#ifdef LOG_SOFTMAX + data = SUB_OP(data_diff_mult, (int16)sum_val_u, int, 16); +#else /* LOG_SOFTMAX */ data = ASYMM_MULT(shifted_scale, data, 16); data = ASYMM_ROUNDING_DIVIDE_BY_POW2(data, num_bits_over_unit + 31 - 8, 16); +#endif /* LOG_SOFTMAX */ data = select(0, data, data_diff >= (int16)(DIFF_MIN)); vstore16(convert_uchar16_sat(data), 0, (__global uchar *)offset(&dst, 0, 0)); } |