aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/softmax_layer_quantized.cl
diff options
context:
space:
mode:
authorSang-Hoon Park <sang-hoon.park@arm.com>2020-08-10 22:50:17 +0100
committerSang-Hoon Park <sang-hoon.park@arm.com>2020-08-11 12:55:59 +0000
commit77d3d2445fa590dee505c4171daef9dd6c8124ca (patch)
treee799cd4005d569feba5e4bb5f9f44f5dd9e99bd2 /src/core/CL/cl_kernels/softmax_layer_quantized.cl
parent03d8a8991c6d87f227ed149ca653e897ed47dfc0 (diff)
downloadComputeLibrary-77d3d2445fa590dee505c4171daef9dd6c8124ca.tar.gz
COMPMID-3607: Fix softmax summation logic for QASYMM8_SIGNED
For the elements that shouldn't contribute to the sum, zero is used to compute the correct sum. Change-Id: I5360534b5b0f81ee3d3aaaf5a046b99ecd943894 Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3703 Reviewed-by: Pablo Marquez <pablo.tello@arm.com> Reviewed-by: TeresaARM <teresa.charlinreyes@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/softmax_layer_quantized.cl')
-rw-r--r--src/core/CL/cl_kernels/softmax_layer_quantized.cl15
1 files changed, 8 insertions, 7 deletions
diff --git a/src/core/CL/cl_kernels/softmax_layer_quantized.cl b/src/core/CL/cl_kernels/softmax_layer_quantized.cl
index f4c5c4b60e..22b8df8f74 100644
--- a/src/core/CL/cl_kernels/softmax_layer_quantized.cl
+++ b/src/core/CL/cl_kernels/softmax_layer_quantized.cl
@@ -194,7 +194,7 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_serial(
data_fp = asymm_rescale(data_fp, 0, EXP_ACCUMULATION_INT_BITS);
VSTORE(VECTOR_SIZE)
(data_diff, 0, (__global int *)offset(&dst, i << LOG_VECTOR_SIZE, 0));
- sum1D = sum1D + select(MIN_VALUE, data_fp, data_diff >= (VEC_INT)(DIFF_MIN));
+ sum1D = sum1D + select(0, data_fp, data_diff >= (VEC_INT)(DIFF_MIN));
}
#ifdef NON_MULTIPLE_OF_VECTOR_SIZE
@@ -208,8 +208,8 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_serial(
VEC_INT widx_ = CONVERT(((VEC_UINT)(width4 << LOG_VECTOR_SIZE) + idx__) < width, VEC_INT);
VSTORE(VECTOR_SIZE)
(data_diff, 0, (__global int *)offset(&dst, width4 << LOG_VECTOR_SIZE, 0));
- data_fp = select(MIN_VALUE, data_fp, data_diff >= (VEC_INT)(DIFF_MIN));
- sum1D = sum1D + select(MIN_VALUE, data_fp, widx_);
+ data_fp = select(0, data_fp, data_diff >= (VEC_INT)(DIFF_MIN));
+ sum1D = sum1D + select(0, data_fp, widx_);
#endif /* NON_MULTIPLE_OF_VECTOR_SIZE */
// Perform sum reduction
@@ -417,7 +417,7 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_parallel(
data_fp = ASYMM_EXP_ON_NEGATIVE_VALUES(data_diff_mult, SCALED_DIFF_INT_BITS, 4);
data_fp = ASYMM_RESCALE(data_fp, 0, EXP_ACCUMULATION_INT_BITS, 4);
vstore4(data_diff, 0, (__global int *)offset(&dst, i * GRID_SIZE * 4, 0));
- sum1D = sum1D + select(MIN_VALUE, data_fp, data_diff >= (int4)(DIFF_MIN));
+ sum1D = sum1D + select(0, data_fp, data_diff >= (int4)(DIFF_MIN));
}
#ifdef NON_MULTIPLE_OF_GRID_SIZE
//TODO: Optimize the calculation (avoid %).
@@ -432,7 +432,7 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_parallel(
data_fp = ASYMM_EXP_ON_NEGATIVE_VALUES(data_diff_mult, SCALED_DIFF_INT_BITS, 4);
data_fp = ASYMM_RESCALE(data_fp, 0, EXP_ACCUMULATION_INT_BITS, 4);
vstore4(data_diff, 0, (__global int *)offset(&dst, i * GRID_SIZE * 4, 0));
- sum1D = sum1D + select(MIN_VALUE, data_fp, data_diff >= (int4)(DIFF_MIN));
+ sum1D = sum1D + select(0, data_fp, data_diff >= (int4)(DIFF_MIN));
}
#ifdef NON_MULTIPLE_OF_VECTOR_SIZE
if(boundary_workitems == 0)
@@ -451,9 +451,10 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_parallel(
data_fp = ASYMM_EXP_ON_NEGATIVE_VALUES(data_diff_mult, SCALED_DIFF_INT_BITS, 4);
data_fp = ASYMM_RESCALE(data_fp, 0, EXP_ACCUMULATION_INT_BITS, 4);
int4 widx = convert_int4(((uint4)(GRID_SIZE * i * 4) + boundary_workitems * 4 + idx4) < width);
- data_fp = select(MIN_VALUE, data_fp, widx);
vstore4(data_diff, 0, (__global int *)offset(&dst, i * GRID_SIZE * 4 + 4, 0));
- sum1D = sum1D + select(MIN_VALUE, data_fp, data_diff >= (int4)(DIFF_MIN));
+ data_fp = select(MIN_VALUE, data_fp, data_diff >= (int4)(DIFF_MIN));
+ data_fp = select(0, data_fp, widx);
+ sum1D = sum1D + data_fp;
}
#endif /* NON_MULTIPLE_OF_VECTOR_SIZE */
#endif /* NON_MULTIPLE_OF_GRID_SIZE */