aboutsummaryrefslogtreecommitdiff
path: root/src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h
diff options
context:
space:
mode:
authorJoel Liang <joel.liang@arm.com>2018-01-05 15:12:53 +0800
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:33 +0000
commit09849a0e7128731473f37cf6045147db68b1c495 (patch)
tree865841c62c4910a34c7f0e86bd5bbc7e14a53e30 /src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h
parent07d4054812d56afdae081294503f98c6dcaea048 (diff)
downloadComputeLibrary-09849a0e7128731473f37cf6045147db68b1c495.tar.gz
APPBROWSER-372: Rewrite the direct_convolution5x5.cs with the new common code
Change-Id: Ie2f398d62dea97e9201f77d22c9f0796db297b63 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/115280 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Zhenglin Li <zhenglin.li@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h')
-rwxr-xr-xsrc/core/GLES_COMPUTE/cs_shaders/helpers_cs.h35
1 files changed, 34 insertions, 1 deletions
diff --git a/src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h b/src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h
index fffc87d90d..dd9e1a3864 100755
--- a/src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h
+++ b/src/core/GLES_COMPUTE/cs_shaders/helpers_cs.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -326,6 +326,23 @@ uint tensor3D_offset_in_bytes(Tensor3DIterator tensor_iter, int x, int y, int z)
#define VLOAD4_CURRENT_ITEM(return_type, tensor_ptr, tensor_iter) VLOAD4(return_type, tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))
#define VSTORE4_CURRENT_ITEM(tensor_ptr, tensor_iter, data) VSTORE4(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter), data)
+#define VLOAD5(return_type, tensor_ptr, offset) \
+ return_type(LOAD(tensor_ptr, offset), \
+ LOAD(tensor_ptr, (offset) + uint(1)), \
+ LOAD(tensor_ptr, (offset) + uint(2)), \
+ LOAD(tensor_ptr, (offset) + uint(3)), \
+ LOAD(tensor_ptr, (offset) + uint(4)))
+
+#define VSTORE5(tensor_ptr, offset, data) \
+ STORE(tensor_ptr, offset, data[0]); \
+ STORE(tensor_ptr, (offset) + uint(1), data[1]); \
+ STORE(tensor_ptr, (offset) + uint(2), data[2]); \
+ STORE(tensor_ptr, (offset) + uint(3), data[3]); \
+ STORE(tensor_ptr, (offset) + uint(4), data[4])
+
+#define VLOAD5_CURRENT_ITEM(return_type, tensor_ptr, tensor_iter) VLOAD5(return_type, tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))
+#define VSTORE5_CURRENT_ITEM(tensor_ptr, tensor_iter, data) VSTORE5(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter), data)
+
/** Converting the vec4 object to 4 half-precision (16-bits) floating point values and packing into a uvec2 object
*
* @param[in] data The vec4 object to be packed
@@ -348,6 +365,19 @@ mediump vec4 unpack4_half(highp uvec2 packed_data)
return vec4(unpackHalf2x16(packed_data.x), unpackHalf2x16(packed_data.y));
}
+/** Unpacking the uvec3 object to 6 half-precision (16-bits) floating point values and converting to a vec2[3] object
+ *
+ * @param[in] packed_data The uvec3 object to be unpacked
+ *
+ * @return The unpacked vec2[3] object
+ */
+mediump vec2[3] unpack6_half(highp uvec3 packed_data)
+{
+ return vec2[3](unpackHalf2x16(packed_data[0]),
+ unpackHalf2x16(packed_data[1]),
+ unpackHalf2x16(packed_data[2]));
+}
+
/** Converting the vec4[2] object to 8 half-precision (16-bits) floating point values and packing into a uvec4 object
*
* @param[in] data The vec4[2] object to be packed
@@ -396,6 +426,9 @@ mediump vec4[3] unpack12_half(highp uvec2[3] packed_data)
#define VLOAD2_UNPACK4_CURRENT_ITEM_HALF(tensor_ptr, tensor_iter) VLOAD2_UNPACK4_HALF(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))
#define VSTORE2_PACK4_CURRENT_ITEM_HALF(tensor_ptr, tensor_iter, data) VSTORE2_PACK4_HALF(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter), data)
+#define VLOAD3_UNPACK6_HALF(tensor_ptr, offset) unpack6_half(VLOAD3(uvec3, tensor_ptr, offset))
+#define VLOAD3_UNPACK6_CURRENT_ITEM_HALF(tensor_ptr, tensor_iter) VLOAD3_UNPACK6_HALF(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))
+
#define VLOAD4_UNPACK8_HALF(tensor_ptr, offset) unpack8_half(VLOAD4(uvec4, tensor_ptr, offset))
#define VSTORE4_PACK8_HALF(tensor_ptr, offset, data) VSTORE4(tensor_ptr, offset, pack8_half(data))
#define VLOAD4_UNPACK8_CURRENT_ITEM_HALF(tensor_ptr, tensor_iter) VLOAD4_UNPACK8_HALF(tensor_ptr, CURRENT_ITEM_OFFSET(tensor_iter))