aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/softmax_layer_quantized.cl
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-02-19 15:33:41 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:47:18 +0000
commit72f39be2f372b9a810cb27320dba5d0722407549 (patch)
tree3ac73795c23107f7eab88cec553be9b9ed248214 /src/core/CL/cl_kernels/softmax_layer_quantized.cl
parent7bfb199c1461fd553a78ca1947855c7d25106803 (diff)
downloadComputeLibrary-72f39be2f372b9a810cb27320dba5d0722407549.tar.gz
COMPMID-939 Fix mismatches and finalize CLSoftmaxLayer optimization
Change-Id: I4404f91a270e0ba7bbb7451c4c43a485fd4a3f6c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/121105 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/softmax_layer_quantized.cl')
-rw-r--r--src/core/CL/cl_kernels/softmax_layer_quantized.cl7
1 files changed, 3 insertions, 4 deletions
diff --git a/src/core/CL/cl_kernels/softmax_layer_quantized.cl b/src/core/CL/cl_kernels/softmax_layer_quantized.cl
index 7521c8e1ee..c055381fc5 100644
--- a/src/core/CL/cl_kernels/softmax_layer_quantized.cl
+++ b/src/core/CL/cl_kernels/softmax_layer_quantized.cl
@@ -232,7 +232,6 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_serial(
* @note Datatype must be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
* @note Fixed point position must be given as a preprocessor argument using -DFIXED_POINT_POSITION=pos. e.g. DFIXED_POINT_POSITION=4
* @note In case the input is not a multiple of VECTOR_SIZE (2,4,8,16) -DNON_MULTIPLE_OF_VECTOR_SIZE must be passed.
- * @note Beta can be optionally passed at compile time using -DBETA (by default, it is 1.0).
*
* @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
@@ -288,7 +287,7 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_parallel(
__local uchar max_local;
uchar4 uchar_min = (uchar4)0;
- uchar4 max_val_vec = (uchar4)uchar_min;
+ uchar4 max_val_vec = uchar_min;
// Number of elements per work-item.
const uint row = width / GRID_SIZE;
@@ -441,8 +440,8 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_parallel(
int4 data_diff_mult = mult_by_quantized_multiplier_parallel(data_diff);
data_fp = ASYMM_EXP_ON_NEGATIVE_VALUES(data_diff_mult, SCALED_DIFF_INT_BITS, 4);
data_fp = ASYMM_RESCALE(data_fp, 0, EXP_ACCUMULATION_INT_BITS, 4);
- uchar4 widx = convert_uchar4(((uint4)(GRID_SIZE * i * 4) + boundary_workitems * 4 + idx4) < width);
- data = select(uchar_min, data, widx);
+ int4 widx = convert_int4(((uint4)(GRID_SIZE * i * 4) + boundary_workitems * 4 + idx4) < width);
+ data_fp = select(0, data_fp, widx);
vstore4(data_diff, 0, (__global int *)offset(&dst, i * GRID_SIZE * 4 + 4, 0));
sum1D = sum1D + select(0, data_fp, data_diff >= (int4)(DIFF_MIN));
}