aboutsummaryrefslogtreecommitdiff
path: root/src/core/GLES_COMPUTE/cs_shaders/gemm.cs
diff options
context:
space:
mode:
authorzhenglin <zhenglin.li@arm.com>2018-01-30 18:15:52 +0800
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:45:00 +0000
commit2219dea47ff74cadf2a6ee1ab95e57cd96e60596 (patch)
tree7c74a3353ce3df529c77caf6052b2c2a768e6274 /src/core/GLES_COMPUTE/cs_shaders/gemm.cs
parent11c3b33215225f5baf34c045a68982e0058af74a (diff)
downloadComputeLibrary-2219dea47ff74cadf2a6ee1ab95e57cd96e60596.tar.gz
APPBROWSER-390,397,398: bugfix and fully connected validation issue on specific dataset
Change-Id: I227e90445715c3bd394e49930b010c0a5f5ca177 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118108 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Joel Liang <joel.liang@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/GLES_COMPUTE/cs_shaders/gemm.cs')
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/gemm.cs10
1 files changed, 5 insertions, 5 deletions
diff --git a/src/core/GLES_COMPUTE/cs_shaders/gemm.cs b/src/core/GLES_COMPUTE/cs_shaders/gemm.cs
index c81bed7066..580acc16a7 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/gemm.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/gemm.cs
@@ -509,7 +509,7 @@ void main()
vec4 acc3 = vec4(0.0f);
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) < int(end_row_vec_a - uint(2));
+ for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) <= int(end_row_vec_a - uint(4));
TENSOR_ITERATOR_ADVANCE_IN_BYTES(src0_iter, 2 * 2), TENSOR_ITERATOR_ADVANCE_IN_BYTES(src1_iter, uint(2) * src1_attrs.stride_y))
{
vec2 a0 = LOAD_UNPACK2_CURRENT_ITEM_HALF(src0_ptr, src0_iter);
@@ -549,7 +549,7 @@ void main()
vec2 a1 = LOAD_UNPACK2_HALF(src0_ptr, IMAGE_OFFSET(src0_iter, 0, 1));
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
- vec a2 = LOAD_UNPACK2_HALF(src0_ptr, IMAGE_OFFSET(src0_iter, 0, 2));
+ vec2 a2 = LOAD_UNPACK2_HALF(src0_ptr, IMAGE_OFFSET(src0_iter, 0, 2));
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
vec2 a3 = LOAD_UNPACK2_HALF(src0_ptr, IMAGE_OFFSET(src0_iter, 0, 3));
@@ -615,7 +615,7 @@ void main()
vec4 acc3 = vec4(0.0f);
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) < int(end_row_vec_a - uint(16));
+ for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) <= int(end_row_vec_a - uint(16));
TENSOR_ITERATOR_ADVANCE_IN_BYTES(src0_iter, uint(8) * src0_attrs.stride_x), TENSOR_ITERATOR_ADVANCE_IN_BYTES(src1_iter, uint(8) * src1_attrs.stride_y))
{
vec4 a0[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(src0_ptr, src0_iter);
@@ -729,7 +729,7 @@ void main()
acc[0] = vec4(0.0f);
acc[1] = vec4(0.0f);
- for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) < int(end_row_vec_a - uint(16));
+ for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) <= int(end_row_vec_a - uint(16));
TENSOR_ITERATOR_ADVANCE_IN_BYTES(src0_iter, uint(8) * src0_attrs.stride_x), TENSOR_ITERATOR_ADVANCE_IN_BYTES(src1_iter, uint(8) * src1_attrs.stride_y))
{
vec4 a[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(src0_ptr, src0_iter);
@@ -823,7 +823,7 @@ void main(void)
VectorIterator biases_iter = CONVERT_TO_VECTOR_ITERATOR(biases_attrs, biases_shift);
vec4 u[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(accum_ptr, accum_iter);
- vec4 v[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(biases_ptr, bias_iter);
+ vec4 v[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(biases_ptr, biases_iter);
vec4 r[2];
r[0] = u[0] + v[0];