aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXinghang Zhou <xinghang.zhou@arm.com>2017-11-02 16:37:24 +0800
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:17 +0000
commit4af62a0202bbc103a95a1c2d04c2425418935a0f (patch)
tree0800efdcec65a4eee899ca033d432a1082a08147
parent7c435f2e32e3441ac6c288e786f25c86b65e1453 (diff)
downloadComputeLibrary-4af62a0202bbc103a95a1c2d04c2425418935a0f.tar.gz
APPBROWSER-289 DirectConvolution1*1 optimization for FP16, and fix a typo in PoolingLayer
Change-Id: I9cd3d872e353a9a404ab1d188d0d48a0965c5916 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112047 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Joel Liang <joel.liang@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs799
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs4
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp66
-rw-r--r--tests/validation/GLES_COMPUTE/PoolingLayer.cpp3
4 files changed, 859 insertions, 13 deletions
diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
index 1338299f8c..190d7d6f7c 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
@@ -33,6 +33,8 @@ precision mediump float;
/** This kernel performs a direct convolution to convolve the low three dimensions.
*
* @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32"
+ * @note This kernel has multiple optimized direct convolution options for FP16.
+ * The direct convolution option must be passed at compile time using "#define PROCESS_nX_nY_nZ" e.g. "#define PROCESS_8X_1Y_1Z"
* @note The convolution stride x must be passed at compile time using "#define STRIDE_X n" e.g. "#define STRIDE_X 1"
* @note In case biases will be added to the convolution "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row.
*
@@ -99,8 +101,657 @@ void main()
STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
}
+
#elif defined(DATA_TYPE_FP16)
+#if defined(PROCESS_4X_1Y_1Z)
+TENSOR_DECLARATION(1, srcBuffer, uvec2, src_ptr, src_shift, 3, readonly);
+TENSOR_DECLARATION(2, dstBuffer, uvec2, dst_ptr, dst_shift, 3, writeonly);
+TENSOR_DECLARATION(3, weightsBuffer, uint, weights_ptr, weights_shift, 2, readonly);
+#ifdef BIAS
+TENSOR_DECLARATION(4, biasesBuffer, uint, biases_ptr, biases_shift, 2, readonly);
+#endif /* BIAS */
+
+#if STRIDE_X == 2
+#define CONVOLVE(s, w) convolve_stride2(s, w)
+#elif STRIDE_X == 1 /* STRIDE_X == 1 */
+#define CONVOLVE(s, w) convolve_stride1(s, w)
+#else /* STRIDE_X not equals 1 or 2 */
+#error STRIDE_X larger than 2 is not supported
+#endif /* STRIDE_X == 2 */
+
+vec4 convolve_stride1(ImageIterator src_iter, float w)
+{
+ vec4 s;
+ s = LOAD_UNPACK4_CURRENT_ITEM_HALF(src_ptr, src_iter);
+
+ s *= w;
+
+ return s;
+}
+
+vec4 convolve_stride2(ImageIterator src_iter, float w)
+{
+ vec4 s[2];
+ vec4 r;
+
+ s[0] = LOAD_UNPACK4_CURRENT_ITEM_HALF(src_ptr, src_iter);
+ s[1] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 4, 0));
+ r = vec4(s[0].xz, s[1].xz);
+
+ r *= w;
+
+ return r;
+}
+
+void main()
+{
+ ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR(src_attrs, src_shift);
+ Tensor3DIterator weights_iter = CONVERT_TO_TENSOR3D_ITERATOR_NO_STEP(weights_attrs, weights_shift);
+ Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
+
+#ifdef BIAS
+ VectorIterator biases_iter = CONVERT_TO_VECTOR_ITERATOR_NO_STEP(biases_attrs, biases_shift);
+#endif /* BIAS */
+
+ vec4 pixels = vec4(0.f);
+
+ uint z_index = gl_GlobalInvocationID.z;
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, z_index * weights_stride_w);
+
+#ifdef WEIGHTS_OPTIMIZATION
+ float w1, w2;
+ int nums = (int(weights_depth)) / 2;
+ for(int d = 0; d < nums; ++d)
+ {
+ vec2 vec2_w = LOAD_UNPACK2_CURRENT_ITEM_HALF(weights_ptr, weights_iter);
+
+ w1 = vec2_w.x;
+ vec4 r1 = CONVOLVE(src_iter, w1);
+ pixels += r1;
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+
+ w2 = vec2_w.y;
+ vec4 r2 = CONVOLVE(src_iter, w2);
+ pixels += r2;
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, weights_attrs.stride_z * uint(2));
+ }
+#else /* WEIGHTS_OPTIMIZATION */
+ float w;
+ for(int d = 0; d < int(weights_depth); ++d)
+ {
+ w = LOAD_UNPACK2_CURRENT_ITEM_HALF(weights_ptr, weights_iter).x;
+
+ vec4 r = CONVOLVE(src_iter, w);
+ pixels += r;
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, weights_attrs.stride_z);
+ }
+#endif /* WEIGHTS_OPTIMIZATION */
+
+#ifdef BIAS
+ vec2 vec2_b;
+ float b;
+
+ vec2_b = LOAD_UNPACK2_HALF(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
+
+ if(z_index % uint(2) == uint(0))
+ {
+ b = vec2_b.x;
+ }
+ else
+ {
+ b = vec2_b.y;
+ }
+
+ pixels += b;
+#endif /* BIAS */
+
+ STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
+}
+#elif defined(PROCESS_4X_2Y_1Z)
+TENSOR_DECLARATION(1, srcBuffer, uvec2, src_ptr, src_shift, 3, readonly);
+TENSOR_DECLARATION(2, dstBuffer, uvec2, dst_ptr, dst_shift, 3, writeonly);
+TENSOR_DECLARATION(3, weightsBuffer, uint, weights_ptr, weights_shift, 2, readonly);
+#ifdef BIAS
+TENSOR_DECLARATION(4, biasesBuffer, uint, biases_ptr, biases_shift, 2, readonly);
+#endif /* BIAS */
+
+#if STRIDE_X == 2
+#define CONVOLVE(s, w) convolve_stride2(s, w)
+#elif STRIDE_X == 1 /* STRIDE_X == 1 */
+#define CONVOLVE(s, w) convolve_stride1(s, w)
+#else /* STRIDE_X not equals 1 or 2 */
+#error STRIDE_X larger than 2 is not supported
+#endif /* STRIDE_X == 2 */
+
+vec4[2] convolve_stride1(ImageIterator src_iter, float w)
+{
+ vec4 s[2];
+ s[0] = LOAD_UNPACK4_CURRENT_ITEM_HALF(src_ptr, src_iter);
+ s[1] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 0, int(STRIDE_Y)));
+
+ s[0] *= w;
+ s[1] *= w;
+ return s;
+}
+
+vec4[2] convolve_stride2(ImageIterator src_iter, float w)
+{
+ vec4 s1[2];
+ vec4 s2[2];
+ vec4 r[2];
+
+ s1[0] = LOAD_UNPACK4_CURRENT_ITEM_HALF(src_ptr, src_iter);
+ s1[1] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 4, 0));
+ r[0] = vec4(s1[0].xz, s1[1].xz);
+
+ s2[0] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 0, int(STRIDE_Y)));
+ s2[1] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 4, int(STRIDE_Y)));
+ r[1] = vec4(s2[0].xz, s2[1].xz);
+
+ r[0] *= w;
+ r[1] *= w;
+
+ return r;
+}
+
+void main()
+{
+ ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR(src_attrs, src_shift);
+ Tensor3DIterator weights_iter = CONVERT_TO_TENSOR3D_ITERATOR_NO_STEP(weights_attrs, weights_shift);
+ Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
+
+#ifdef BIAS
+ VectorIterator biases_iter = CONVERT_TO_VECTOR_ITERATOR_NO_STEP(biases_attrs, biases_shift);
+#endif /* BIAS */
+
+ vec4 pixels[2];
+ pixels[0] = vec4(0.f);
+ pixels[1] = vec4(0.f);
+
+ uint z_index = gl_GlobalInvocationID.z;
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, z_index * weights_stride_w);
+
+#ifdef WEIGHTS_OPTIMIZATION
+ float w1, w2;
+ int nums = (int(weights_depth)) / 2;
+ for(int d = 0; d < nums; ++d)
+ {
+ vec2 vec2_w = LOAD_UNPACK2_CURRENT_ITEM_HALF(weights_ptr, weights_iter);
+
+ w1 = vec2_w.x;
+ vec4 r1[2] = CONVOLVE(src_iter, w1);
+ pixels[0] += r1[0];
+ pixels[1] += r1[1];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+
+ w2 = vec2_w.y;
+ vec4 r2[2] = CONVOLVE(src_iter, w2);
+ pixels[0] += r2[0];
+ pixels[1] += r2[1];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, weights_attrs.stride_z * uint(2));
+ }
+#else /* WEIGHTS_OPTIMIZATION */
+ float w;
+ for(int d = 0; d < int(weights_depth); ++d)
+ {
+ w = LOAD_UNPACK2_CURRENT_ITEM_HALF(weights_ptr, weights_iter).x;
+
+ vec4 r[2] = CONVOLVE(src_iter, w);
+ pixels[0] += r[0];
+ pixels[1] += r[1];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, weights_attrs.stride_z);
+ }
+#endif /* WEIGHTS_OPTIMIZATION */
+
+#ifdef BIAS
+ vec2 vec2_b;
+ float b;
+
+ vec2_b = LOAD_UNPACK2_HALF(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
+
+ if(z_index % uint(2) == uint(0))
+ {
+ b = vec2_b.x;
+ }
+ else
+ {
+ b = vec2_b.y;
+ }
+
+ pixels[0] += b;
+ pixels[1] += b;
+#endif /* BIAS */
+
+ STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
+ STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
+}
+#elif defined(PROCESS_4X_3Y_1Z)
+TENSOR_DECLARATION(1, srcBuffer, uvec2, src_ptr, src_shift, 3, readonly);
+TENSOR_DECLARATION(2, dstBuffer, uvec2, dst_ptr, dst_shift, 3, writeonly);
+TENSOR_DECLARATION(3, weightsBuffer, uint, weights_ptr, weights_shift, 2, readonly);
+#ifdef BIAS
+TENSOR_DECLARATION(4, biasesBuffer, uint, biases_ptr, biases_shift, 2, readonly);
+#endif /* BIAS */
+
+#if STRIDE_X == 2
+#define CONVOLVE(s, w) convolve_stride2(s, w)
+#elif STRIDE_X == 1 /* STRIDE_X == 1 */
+#define CONVOLVE(s, w) convolve_stride1(s, w)
+#else /* STRIDE_X not equals 1 or 2 */
+#error STRIDE_X larger than 2 is not supported
+#endif /* STRIDE_X == 2 */
+
+vec4[3] convolve_stride1(ImageIterator src_iter, float w)
+{
+ vec4 s[3];
+ s[0] = LOAD_UNPACK4_CURRENT_ITEM_HALF(src_ptr, src_iter);
+ s[1] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 0, int(STRIDE_Y)));
+ s[2] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 0, (2 * int(STRIDE_Y))));
+
+ s[0] *= w;
+ s[1] *= w;
+ s[2] *= w;
+
+ return s;
+}
+
+vec4[3] convolve_stride2(ImageIterator src_iter, float w)
+{
+ vec4 s1[2];
+ vec4 s2[2];
+ vec4 s3[2];
+ vec4 r[3];
+
+ s1[0] = LOAD_UNPACK4_CURRENT_ITEM_HALF(src_ptr, src_iter);
+ s1[1] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 4, 0));
+ r[0] = vec4(s1[0].xz, s1[1].xz);
+
+ s2[0] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 0, int(STRIDE_Y)));
+ s2[1] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 4, int(STRIDE_Y)));
+ r[1] = vec4(s2[0].xz, s2[1].xz);
+
+ s3[0] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 0, (2 * int(STRIDE_Y))));
+ s3[1] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 4, (2 * int(STRIDE_Y))));
+ r[2] = vec4(s3[0].xz, s3[1].xz);
+
+ r[0] *= w;
+ r[1] *= w;
+ r[2] *= w;
+
+ return r;
+}
+
+void main()
+{
+ ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR(src_attrs, src_shift);
+ Tensor3DIterator weights_iter = CONVERT_TO_TENSOR3D_ITERATOR_NO_STEP(weights_attrs, weights_shift);
+ Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
+
+#ifdef BIAS
+ VectorIterator biases_iter = CONVERT_TO_VECTOR_ITERATOR_NO_STEP(biases_attrs, biases_shift);
+#endif /* BIAS */
+
+ vec4 pixels[3];
+ pixels[0] = vec4(0.f);
+ pixels[1] = vec4(0.f);
+ pixels[2] = vec4(0.f);
+
+ uint z_index = gl_GlobalInvocationID.z;
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, z_index * weights_stride_w);
+
+#ifdef WEIGHTS_OPTIMIZATION
+ float w1, w2;
+ int nums = (int(weights_depth)) / 2;
+ for(int d = 0; d < nums; ++d)
+ {
+ vec2 vec2_w = LOAD_UNPACK2_CURRENT_ITEM_HALF(weights_ptr, weights_iter);
+
+ w1 = vec2_w.x;
+ vec4 r1[3] = CONVOLVE(src_iter, w1);
+ pixels[0] += r1[0];
+ pixels[1] += r1[1];
+ pixels[2] += r1[2];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+
+ w2 = vec2_w.y;
+ vec4 r2[3] = CONVOLVE(src_iter, w2);
+ pixels[0] += r2[0];
+ pixels[1] += r2[1];
+ pixels[2] += r2[2];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, weights_attrs.stride_z * uint(2));
+ }
+#else /* WEIGHTS_OPTIMIZATION */
+ float w;
+ for(int d = 0; d < int(weights_depth); ++d)
+ {
+ w = LOAD_UNPACK2_CURRENT_ITEM_HALF(weights_ptr, weights_iter).x;
+
+ vec4 r[3] = CONVOLVE(src_iter, w);
+ pixels[0] += r[0];
+ pixels[1] += r[1];
+ pixels[2] += r[2];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, weights_attrs.stride_z);
+ }
+#endif /* WEIGHTS_OPTIMIZATION */
+
+#ifdef BIAS
+ vec2 vec2_b;
+ float b;
+
+ vec2_b = LOAD_UNPACK2_HALF(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
+
+ if(z_index % uint(2) == uint(0))
+ {
+ b = vec2_b.x;
+ }
+ else
+ {
+ b = vec2_b.y;
+ }
+
+ pixels[0] += b;
+ pixels[1] += b;
+ pixels[2] += b;
+#endif /* BIAS */
+
+ STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
+ STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
+ STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
+}
+#elif defined(PROCESS_4X_4Y_1Z)
+TENSOR_DECLARATION(1, srcBuffer, uvec2, src_ptr, src_shift, 3, readonly);
+TENSOR_DECLARATION(2, dstBuffer, uvec2, dst_ptr, dst_shift, 3, writeonly);
+TENSOR_DECLARATION(3, weightsBuffer, uint, weights_ptr, weights_shift, 2, readonly);
+#ifdef BIAS
+TENSOR_DECLARATION(4, biasesBuffer, uint, biases_ptr, biases_shift, 2, readonly);
+#endif /* BIAS */
+
+#if STRIDE_X == 2
+#define CONVOLVE(s, w, x1, y1) convolve_stride2(s, w, x1, y1)
+#elif STRIDE_X == 1 /* STRIDE_X == 1 */
+#define CONVOLVE(s, w, x1, y1) convolve_stride1(s, w, x1, y1)
+#else /* STRIDE_X not equals 1 or 2 */
+#error STRIDE_X larger than 2 is not supported
+#endif /* STRIDE_X == 2 */
+
+vec4[2] convolve_stride1(ImageIterator src_iter, float w, int x1, int y1)
+{
+ vec4 s[2];
+ s[0] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, x1, y1));
+ s[1] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, x1, (y1 + int(STRIDE_Y))));
+
+ s[0] *= w;
+ s[1] *= w;
+
+ return s;
+}
+
+vec4[2] convolve_stride2(ImageIterator src_iter, float w, int x1, int y1)
+{
+ vec4 s1[2];
+ vec4 s2[2];
+ vec4 r[2];
+
+ s1[0] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, x1, y1));
+ s1[1] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, (4 + x1), y1));
+ r[0] = vec4(s1[0].xz, s1[1].xz);
+
+ s2[0] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, x1, (y1 + int(STRIDE_Y))));
+ s2[1] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, (4 + x1), (y1 + int(STRIDE_Y))));
+ r[1] = vec4(s2[0].xz, s2[1].xz);
+
+ r[0] *= w;
+ r[1] *= w;
+
+ return r;
+}
+
+void main()
+{
+ ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR(src_attrs, src_shift);
+ Tensor3DIterator weights_iter = CONVERT_TO_TENSOR3D_ITERATOR_NO_STEP(weights_attrs, weights_shift);
+ Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
+
+#ifdef BIAS
+ VectorIterator biases_iter = CONVERT_TO_VECTOR_ITERATOR_NO_STEP(biases_attrs, biases_shift);
+#endif /* BIAS */
+
+ vec4 pixels[2];
+ vec4 pixels1[2];
+ pixels[0] = vec4(0.f);
+ pixels[1] = vec4(0.f);
+ pixels1[0] = vec4(0.f);
+ pixels1[1] = vec4(0.f);
+
+ uint z_index = gl_GlobalInvocationID.z;
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, z_index * weights_stride_w);
+
+#ifdef WEIGHTS_OPTIMIZATION
+ float w1, w2;
+ int nums = (int(weights_depth)) / 2;
+ for(int d = 0; d < nums; ++d)
+ {
+ vec2 vec2_w = LOAD_UNPACK2_CURRENT_ITEM_HALF(weights_ptr, weights_iter);
+
+ w1 = vec2_w.x;
+ vec4 r1[2] = CONVOLVE(src_iter, w1, 0, 0);
+ vec4 r2[2] = CONVOLVE(src_iter, w1, 0, (2 * int(STRIDE_Y)));
+ pixels[0] += r1[0];
+ pixels[1] += r1[1];
+ pixels1[0] += r2[0];
+ pixels1[1] += r2[1];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+
+ w2 = vec2_w.y;
+ vec4 r3[2] = CONVOLVE(src_iter, w2, 0, 0);
+ vec4 r4[2] = CONVOLVE(src_iter, w2, 0, (2 * int(STRIDE_Y)));
+ pixels[0] += r3[0];
+ pixels[1] += r3[1];
+ pixels1[0] += r4[0];
+ pixels1[1] += r4[1];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, weights_attrs.stride_z * uint(2));
+ }
+#else /* WEIGHTS_OPTIMIZATION */
+ float w;
+ for(int d = 0; d < int(weights_depth); ++d)
+ {
+ w = LOAD_UNPACK2_CURRENT_ITEM_HALF(weights_ptr, weights_iter).x;
+
+ vec4 r1[2] = CONVOLVE(src_iter, w, 0, 0);
+ vec4 r2[2] = CONVOLVE(src_iter, w, 0, (2 * int(STRIDE_Y)));
+ pixels[0] += r1[0];
+ pixels[1] += r1[1];
+ pixels1[0] += r2[0];
+ pixels1[1] += r2[1];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, weights_attrs.stride_z);
+ }
+#endif /* WEIGHTS_OPTIMIZATION */
+
+#ifdef BIAS
+ vec2 vec2_b;
+ float b;
+
+ vec2_b = LOAD_UNPACK2_HALF(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
+
+ if(z_index % uint(2) == uint(0))
+ {
+ b = vec2_b.x;
+ }
+ else
+ {
+ b = vec2_b.y;
+ }
+
+ pixels[0] += b;
+ pixels[1] += b;
+ pixels1[0] += b;
+ pixels1[1] += b;
+#endif /* BIAS */
+
+ STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
+ STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
+ STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels1[0]);
+ STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 3, 0), pixels1[1]);
+}
+#elif defined(PROCESS_4X_2Y_2Z)
+TENSOR_DECLARATION(1, srcBuffer, uvec2, src_ptr, src_shift, 3, readonly);
+TENSOR_DECLARATION(2, dstBuffer, uvec2, dst_ptr, dst_shift, 3, writeonly);
+TENSOR_DECLARATION(3, weightsBuffer, uint, weights_ptr, weights_shift, 2, readonly);
+#ifdef BIAS
+TENSOR_DECLARATION(4, biasesBuffer, uint, biases_ptr, biases_shift, 2, readonly);
+#endif /* BIAS */
+
+#if STRIDE_X == 2
+#define CONVOLVE(s, w) convolve_stride2(s, w)
+#elif STRIDE_X == 1 /* STRIDE_X == 1 */
+#define CONVOLVE(s, w) convolve_stride1(s, w)
+#else /* STRIDE_X not equals 1 or 2 */
+#error STRIDE_X larger than 2 is not supported
+#endif /* STRIDE_X == 2 */
+
+vec4[2] convolve_stride1(ImageIterator src_iter, float w)
+{
+ vec4 s[2];
+ s[0] = LOAD_UNPACK4_CURRENT_ITEM_HALF(src_ptr, src_iter);
+ s[1] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 0, int(STRIDE_Y)));
+
+ s[0] *= w;
+ s[1] *= w;
+
+ return s;
+}
+
+vec4[2] convolve_stride2(ImageIterator src_iter, float w)
+{
+ vec4 s1[2];
+ vec4 s2[2];
+ vec4 r[2];
+
+ s1[0] = LOAD_UNPACK4_CURRENT_ITEM_HALF(src_ptr, src_iter);
+ s1[1] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 4, 0));
+ r[0] = vec4(s1[0].xz, s1[1].xz);
+
+ s2[0] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 0, int(STRIDE_Y)));
+ s2[1] = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, 4, int(STRIDE_Y)));
+ r[1] = vec4(s2[0].xz, s2[1].xz);
+
+ r[0] *= w;
+ r[1] *= w;
+
+ return r;
+}
+
+void main()
+{
+ ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR(src_attrs, src_shift);
+ Tensor3DIterator weights_iter = CONVERT_TO_TENSOR3D_ITERATOR_NO_STEP(weights_attrs, weights_shift);
+ Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
+
+#ifdef BIAS
+ VectorIterator biases_iter = CONVERT_TO_VECTOR_ITERATOR_NO_STEP(biases_attrs, biases_shift);
+#endif /* BIAS */
+
+ uint z_base_index = uint(gl_GlobalInvocationID.z) << uint(1);
+
+ // store orginal src current offset
+ int s_offset_in_bytes = src_iter.current_offset_in_bytes;
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, z_base_index * weights_stride_w);
+
+ for(int z = 0; z < 2; ++z)
+ {
+ uint z_index = z_base_index + uint(z);
+
+ src_iter.current_offset_in_bytes = s_offset_in_bytes;
+
+ vec4 pixels[2];
+ pixels[0] = vec4(0.f);
+ pixels[1] = vec4(0.f);
+
+#ifdef WEIGHTS_OPTIMIZATION
+ float w1, w2;
+ int nums = (int(weights_depth)) / 2;
+ for(int d = 0; d < nums; ++d)
+ {
+ vec2 vec2_w = LOAD_UNPACK2_CURRENT_ITEM_HALF(weights_ptr, weights_iter);
+
+ w1 = vec2_w.x;
+ vec4 r1[2] = CONVOLVE(src_iter, w1);
+ pixels[0] += r1[0];
+ pixels[1] += r1[1];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+
+ w2 = vec2_w.y;
+ vec4 r2[2] = CONVOLVE(src_iter, w2);
+ pixels[0] += r2[0];
+ pixels[1] += r2[1];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, weights_attrs.stride_z * uint(2));
+ }
+#else /* WEIGHTS_OPTIMIZATION */
+ float w;
+ for(int d = 0; d < int(weights_depth); ++d)
+ {
+ w = LOAD_UNPACK2_CURRENT_ITEM_HALF(weights_ptr, weights_iter).x;
+
+ vec4 r[2] = CONVOLVE(src_iter, w);
+ pixels[0] += r[0];
+ pixels[1] += r[1];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, weights_attrs.stride_z);
+ }
+#endif /* WEIGHTS_OPTIMIZATION */
+
+#ifdef BIAS
+ vec2 vec2_b;
+ float b;
+
+ vec2_b = LOAD_UNPACK2_HALF(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
+
+ if(z_index % uint(2) == uint(0))
+ {
+ b = vec2_b.x;
+ }
+ else
+ {
+ b = vec2_b.y;
+ }
+
+ pixels[0] += b;
+ pixels[1] += b;
+#endif /* BIAS */
+
+ STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
+ STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(dst_iter, dst_attrs.stride_z);
+ }
+}
+#elif defined(PROCESS_8X_1Y_1Z)
TENSOR_DECLARATION(1, srcBuffer, uvec4, src_ptr, src_shift, 4, readonly);
TENSOR_DECLARATION(2, dstBuffer, uvec4, dst_ptr, dst_shift, 4, writeonly);
TENSOR_DECLARATION(3, weightsBuffer, uint, weights_ptr, weights_shift, 2, readonly);
@@ -129,13 +780,14 @@ vec4[2] convolve_stride1(ImageIterator src_iter, float w)
vec4[2] convolve_stride2(ImageIterator src_iter, float w)
{
- vec4 s[2];
+ vec4 s1[2];
+ vec4 s2[2];
vec4 r[2];
- s = LOAD_UNPACK8_CURRENT_ITEM_HALF(src_ptr, src_iter);
- r[0] = vec4(s[0].xz, s[1].xz);
- s = LOAD_UNPACK8_HALF(src_ptr, IMAGE_OFFSET(src_iter, 8, 0));
- r[1] = vec4(s[0].xz, s[1].xz);
+ s1 = LOAD_UNPACK8_CURRENT_ITEM_HALF(src_ptr, src_iter);
+ r[0] = vec4(s1[0].xz, s1[1].xz);
+ s2 = LOAD_UNPACK8_HALF(src_ptr, IMAGE_OFFSET(src_iter, 8, 0));
+ r[1] = vec4(s2[0].xz, s2[1].xz);
r[0] *= w;
r[1] *= w;
@@ -218,6 +870,141 @@ void main()
STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
}
-#else /* DATA_TYPE_FP32 */
+#elif defined(PROCESS_8X_2Y_1Z)
+TENSOR_DECLARATION(1, srcBuffer, uvec4, src_ptr, src_shift, 4, readonly);
+TENSOR_DECLARATION(2, dstBuffer, uvec4, dst_ptr, dst_shift, 4, writeonly);
+TENSOR_DECLARATION(3, weightsBuffer, uint, weights_ptr, weights_shift, 2, readonly);
+#ifdef BIAS
+TENSOR_DECLARATION(4, biasesBuffer, uint, biases_ptr, biases_shift, 2, readonly);
+#endif /* BIAS */
+
+#if STRIDE_X == 2
+#define CONVOLVE(s, w, x1, y1) convolve_stride2(s, w, x1, y1)
+#elif STRIDE_X == 1 /* STRIDE_X == 1 */
+#define CONVOLVE(s, w, x1, y1) convolve_stride1(s, w, x1, y1)
+#else /* STRIDE_X not equals 1 or 2 */
+#error STRIDE_X larger than 2 is not supported
+#endif /* STRIDE_X == 2 */
+
+vec4[2] convolve_stride1(ImageIterator src_iter, float w, int x1, int y1)
+{
+ vec4 s[2];
+ s = LOAD_UNPACK8_HALF(src_ptr, IMAGE_OFFSET(src_iter, x1, y1));
+
+ s[0] *= w;
+ s[1] *= w;
+
+ return s;
+}
+
+vec4[2] convolve_stride2(ImageIterator src_iter, float w, int x1, int y1)
+{
+ vec4 s1[2];
+ vec4 s2[2];
+ vec4 r[2];
+
+ s1 = LOAD_UNPACK8_HALF(src_ptr, IMAGE_OFFSET(src_iter, x1, y1));
+ r[0] = vec4(s1[0].xz, s1[1].xz);
+ s2 = LOAD_UNPACK8_HALF(src_ptr, IMAGE_OFFSET(src_iter, (8 + x1), y1));
+ r[1] = vec4(s2[0].xz, s2[1].xz);
+
+ r[0] *= w;
+ r[1] *= w;
+
+ return r;
+}
+
+void main()
+{
+ ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR(src_attrs, src_shift);
+ Tensor3DIterator weights_iter = CONVERT_TO_TENSOR3D_ITERATOR_NO_STEP(weights_attrs, weights_shift);
+ Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
+
+#ifdef BIAS
+ VectorIterator biases_iter = CONVERT_TO_VECTOR_ITERATOR_NO_STEP(biases_attrs, biases_shift);
+#endif /* BIAS */
+
+ vec4 pixels[2];
+ vec4 pixels1[2];
+ pixels[0] = vec4(0.f);
+ pixels[1] = vec4(0.f);
+ pixels1[0] = vec4(0.f);
+ pixels1[1] = vec4(0.f);
+
+ uint z_index = gl_GlobalInvocationID.z;
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, z_index * weights_stride_w);
+
+#ifdef WEIGHTS_OPTIMIZATION
+ float w1, w2;
+ int nums = (int(weights_depth)) / 2;
+ for(int d = 0; d < nums; ++d)
+ {
+ vec2 vec2_w = LOAD_UNPACK2_CURRENT_ITEM_HALF(weights_ptr, weights_iter);
+
+ w1 = vec2_w.x;
+ vec4 r1[2] = CONVOLVE(src_iter, w1, 0, 0);
+ vec4 r2[2] = CONVOLVE(src_iter, w1, 0, (int(STRIDE_Y)));
+ pixels[0] += r1[0];
+ pixels[1] += r1[1];
+ pixels1[0] += r2[0];
+ pixels1[1] += r2[1];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+
+ w2 = vec2_w.y;
+ vec4 r3[2] = CONVOLVE(src_iter, w2, 0, 0);
+ vec4 r4[2] = CONVOLVE(src_iter, w2, 0, (int(STRIDE_Y)));
+ pixels[0] += r3[0];
+ pixels[1] += r3[1];
+ pixels1[0] += r4[0];
+ pixels1[1] += r4[1];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, weights_attrs.stride_z * uint(2));
+ }
+#else /* WEIGHTS_OPTIMIZATION */
+ float w;
+ for(int d = 0; d < int(weights_depth); ++d)
+ {
+ w = LOAD_UNPACK2_CURRENT_ITEM_HALF(weights_ptr, weights_iter).x;
+
+ vec4 r1[2] = CONVOLVE(src_iter, w, 0, 0);
+ vec4 r2[2] = CONVOLVE(src_iter, w, 0, (int(STRIDE_Y)));
+ pixels[0] += r1[0];
+ pixels[1] += r1[1];
+ pixels1[0] += r2[0];
+ pixels1[1] += r2[1];
+
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, weights_attrs.stride_z);
+ }
+#endif /* WEIGHTS_OPTIMIZATION */
+
+#ifdef BIAS
+ vec2 vec2_b;
+ float b;
+
+ vec2_b = LOAD_UNPACK2_HALF(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
+
+ if(z_index % uint(2) == uint(0))
+ {
+ b = vec2_b.x;
+ }
+ else
+ {
+ b = vec2_b.y;
+ }
+
+ pixels[0] += b;
+ pixels[1] += b;
+ pixels1[0] += b;
+ pixels1[1] += b;
+#endif /* BIAS */
+
+ STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
+ STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels1);
+}
+#endif /* PROCESS_4X_1Y_1Z */
+#else /* DATA_TYPE_F32 */
#error Data type not supported
#endif /* DATA_TYPE_FP32 */
diff --git a/src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs
index 1e0fee4688..401b002111 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs
@@ -608,8 +608,8 @@ void main(void)
// Divide by pool region in case of average pooling
int start_x = int(gl_GlobalInvocationID.x) * STRIDE_X - PAD_X;
int start_y = int(gl_GlobalInvocationID.y) * STRIDE_Y - PAD_Y;
- int end_x = int(min(STRIDE_X + POOL_SIZE, MAX_WIDTH));
- int end_y = int(min(STRIDE_Y + POOL_SIZE, MAX_HEIGHT));
+ int end_x = int(min(start_x + POOL_SIZE, MAX_WIDTH));
+ int end_y = int(min(start_y + POOL_SIZE, MAX_HEIGHT));
float res1 = float((end_y - start_y) * (end_x - start_x));
res = DIV_OP(res, res1);
}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
index a7d721d035..ab78fb994b 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
@@ -102,6 +102,7 @@ void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *inp
options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1]));
options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2]));
options.emplace("#define STRIDE_X " + support::cpp11::to_string(_conv_stride_x));
+ options.emplace("#define STRIDE_Y " + support::cpp11::to_string(_conv_stride_y));
std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
options.emplace(("#define " + dt_name));
@@ -148,6 +149,10 @@ void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *inp
num_elems_written_per_iteration_y = 3;
num_elems_written_per_iteration_z = 2;
#endif /* PROCESS_X_8ELEMENTS_Y_3ELEMENTS_FP16 */
+#undef PROCESS_X_8ELEMENTS_Y_3ELEMENTS_FP16
+#undef PROCESS_X_4ELEMENTS_Y_3ELEMENTS_FP16
+#undef PROCESS_X_4ELEMENTS_Y_4ELEMENTS_FP16
+#undef PROCESS_X_4ELEMENTS_Y_3ELEMENTS_Z_2ELEMENTS_FP16
break;
case DataType::F32:
@@ -193,6 +198,9 @@ void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *inp
#else /* PROCESS_1_ELEMENT */
#error Have to declare how many elements to process in one thread.
#endif /* PROCESS_1_ELEMENT */
+#undef PROCESS_1_ELEMENT
+#undef PROCESS_4_ELEMENT
+#undef PROCESS_8_ELEMENT
break;
default:
@@ -203,15 +211,65 @@ void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *inp
}
else if(kernel_size == 1)
{
+ if(weights->info()->dimension(2) % 2 == 0)
+ {
+ options.emplace("#define WEIGHTS_OPTIMIZATION");
+ }
switch(input->info()->data_type())
{
case DataType::F16:
+#define PROCESS_8X_2Y_1Z
+
+#if defined(PROCESS_4X_1Y_1Z)
+ options.emplace("#define PROCESS_4X_1Y_1Z");
+ num_elems_read_per_iteration_x = 4;
+ num_elems_written_per_iteration_x = 4;
+#elif defined(PROCESS_4X_2Y_1Z)
+ options.emplace("#define PROCESS_4X_2Y_1Z");
+ num_elems_read_per_iteration_x = 4;
+ num_elems_read_per_iteration_y = 2;
+ num_elems_written_per_iteration_x = 4;
+ num_elems_written_per_iteration_y = 2;
+#elif defined(PROCESS_4X_3Y_1Z)
+ options.emplace("#define PROCESS_4X_3Y_1Z");
+ num_elems_read_per_iteration_x = 4;
+ num_elems_read_per_iteration_y = 3;
+ num_elems_written_per_iteration_x = 4;
+ num_elems_written_per_iteration_y = 3;
+#elif defined(PROCESS_4X_4Y_1Z)
+ options.emplace("#define PROCESS_4X_4Y_1Z");
+ num_elems_read_per_iteration_x = 4;
+ num_elems_read_per_iteration_y = 4;
+ num_elems_written_per_iteration_x = 4;
+ num_elems_written_per_iteration_y = 4;
+#elif defined(PROCESS_4X_2Y_2Z)
+ ARM_COMPUTE_ERROR_ON_MSG((weights->info()->dimension(4) % 2) == 1, "Current 'weights->info()->dimension(4) % 2) == 1' is not supported");
+ options.emplace("#define PROCESS_4X_2Y_2Z");
+ num_elems_read_per_iteration_x = 4;
+ num_elems_read_per_iteration_y = 2;
+ num_elems_written_per_iteration_x = 4;
+ num_elems_written_per_iteration_y = 2;
+ num_elems_written_per_iteration_z = 2;
+#elif defined(PROCESS_8X_1Y_1Z)
+ options.emplace("#define PROCESS_8X_1Y_1Z");
num_elems_read_per_iteration_x = 8;
num_elems_written_per_iteration_x = 8;
- if(weights->info()->dimension(2) % 2 == 0)
- {
- options.emplace("#define WEIGHTS_OPTIMIZATION");
- }
+#elif defined(PROCESS_8X_2Y_1Z)
+ options.emplace("#define PROCESS_8X_2Y_1Z");
+ num_elems_read_per_iteration_x = 8;
+ num_elems_read_per_iteration_y = 2;
+ num_elems_written_per_iteration_x = 8;
+ num_elems_written_per_iteration_y = 2;
+#else /* PROCESS_4X_1Y_1Z */
+#error Have to declare how many elements to process in one thread.
+#endif /* PROCESS_4X_1Y_1Z */
+#undef PROCESS_4X_1Y_1Z
+#undef PROCESS_4X_2Y_1Z
+#undef PROCESS_4X_3Y_1Z
+#undef PROCESS_4X_4Y_1Z
+#undef PROCESS_4X_2Y_2Z
+#undef PROCESS_8X_1Y_1Z
+#undef PROCESS_8X_2Y_1Z
break;
case DataType::F32:
diff --git a/tests/validation/GLES_COMPUTE/PoolingLayer.cpp b/tests/validation/GLES_COMPUTE/PoolingLayer.cpp
index a78b27edc2..4bfaf245be 100644
--- a/tests/validation/GLES_COMPUTE/PoolingLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/PoolingLayer.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-
+#if 0 // FIXME(APPBROWSER-304): Add exclude padding support for OpenGL ES implementation
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
@@ -103,3 +103,4 @@ TEST_SUITE_END()
} // namespace validation
} // namespace test
} // namespace arm_compute
+#endif // 0