aboutsummaryrefslogtreecommitdiff
path: root/src/core/GLES_COMPUTE/cs_shaders/dropout.cs
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2017-10-26 15:23:08 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit7068f9900d136312318ff430aef588b14e0c87ad (patch)
treeb57ca81231860f1d8755e6f18e5be7c959fb60c6 /src/core/GLES_COMPUTE/cs_shaders/dropout.cs
parentd60737592736715dcfd0520535c48190d4ac77d2 (diff)
downloadComputeLibrary-7068f9900d136312318ff430aef588b14e0c87ad.tar.gz
COMPMID-631: Merge branches/gles_compute branch
Last commit: commit b25c5f68042b0c81bf611d59a1bb8535e1c42497 Author: Xinghang Zhou <xinghang.zhou@arm.com> Date: Wed Oct 25 18:48:10 2017 +0800 Synced validation's tolerances of GCSoftmax from cl side Change-Id: Ibe72054205c1c8721845d679a31af7ed0a7c5cf6 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93283 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'src/core/GLES_COMPUTE/cs_shaders/dropout.cs')
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/dropout.cs204
1 files changed, 204 insertions, 0 deletions
diff --git a/src/core/GLES_COMPUTE/cs_shaders/dropout.cs b/src/core/GLES_COMPUTE/cs_shaders/dropout.cs
new file mode 100644
index 0000000000..54e08b1306
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/dropout.cs
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+
+#include "helpers.h"
+
+layout(std140) uniform shader_params
+{
+ TENSOR3D_PARAM_DECLARATION(src);
+ TENSOR3D_PARAM_DECLARATION(mask);
+ TENSOR3D_PARAM_DECLARATION(dst);
+};
+
+uint hash(uint x)
+{
+ x += (x << 10u);
+ x ^= (x >> 6u);
+ x += (x << 3u);
+ x ^= (x >> 11u);
+ x += (x << 15u);
+ return x;
+}
+
+uint hash(uvec3 v)
+{
+ return hash(v.x ^ hash(v.y) ^ hash(v.z));
+}
+
+float float_construct(uint m)
+{
+ const uint ieee_mantissa = 0x007FFFFFu;
+ const uint ieee_one = 0x3F800000u;
+
+ m &= ieee_mantissa;
+ m |= ieee_one;
+
+ float f = uintBitsToFloat(m);
+ return f - 1.0;
+}
+
+float rand(vec3 v, float seed)
+{
+ return float_construct(hash(floatBitsToUint(v + seed)));
+}
+
+#ifdef DATA_TYPE_FP32
+
+precision highp float;
+
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(mask, 2, float, );
+BUFFER_DECLARATION(dst, 3, float, writeonly);
+
+/** Dropout is used to improve over-fit on neural networks.
+ *
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32"
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32
+ * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] mask_ptr Pointer to the mask tensor. Supported data types: same as @p src_ptr
+ * @param[in] mask_stride_x Stride of the mask tensor in X dimension (in bytes)
+ * @param[in] mask_step_x mask_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] mask_stride_y Stride of the mask tensor in Y dimension (in bytes)
+ * @param[in] mask_step_y mask_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in] mask_stride_z Stride of the mask tensor in Z dimension (in bytes)
+ * @param[in] mask_step_z mask_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] mask_offset_first_element_in_bytes The offset of the first element in the mask tensor
+ * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ */
+void main(void)
+{
+ Tensor3D src = GC_CONVERT_TO_TENSOR3D_STRUCT(src);
+ Tensor3D mask = GC_CONVERT_TO_TENSOR3D_STRUCT(mask);
+ Tensor3D dst = GC_CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+ float random = 0.f;
+ float inputv = 0.f;
+ float maskv = 0.f;
+ float outputv = 0.f;
+
+#ifdef FORWARD
+ random = rand(vec3(gl_GlobalInvocationID.xyz), SEED);
+ maskv = (random > RATIO) ? 1.f : 0.f;
+ GC_STORE1_3D_OFFSET(maskv, mask, 0, 0, 0);
+#else /* FORWARD */
+ GC_LOAD1_3D_OFFSET(maskv, mask, 0, 0, 0);
+#endif /* FORWARD */
+
+ GC_LOAD1_3D_OFFSET(inputv, src, 0, 0, 0);
+ outputv = maskv * inputv * float(SCALE);
+ GC_STORE1_3D_OFFSET(outputv, dst, 0, 0, 0);
+}
+
+#elif defined(DATA_TYPE_FP16)
+
+precision mediump float;
+
+BUFFER_DECLARATION(src, 1, uint, readonly);
+BUFFER_DECLARATION(mask, 2, uint, );
+BUFFER_DECLARATION(dst, 3, uint, writeonly);
+
+/** Dropout is used to improve over-fit on neural networks.
+ *
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16"
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16
+ * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] mask_ptr Pointer to the mask tensor. Supported data types: same as @p src_ptr
+ * @param[in] mask_stride_x Stride of the mask tensor in X dimension (in bytes)
+ * @param[in] mask_step_x mask_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] mask_stride_y Stride of the mask tensor in Y dimension (in bytes)
+ * @param[in] mask_step_y mask_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in] mask_stride_z Stride of the mask tensor in Z dimension (in bytes)
+ * @param[in] mask_step_z mask_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] mask_offset_first_element_in_bytes The offset of the first element in the mask tensor
+ * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ */
+void main(void)
+{
+ Tensor3D src = GC_CONVERT_TO_TENSOR3D_STRUCT(src);
+ Tensor3D mask = GC_CONVERT_TO_TENSOR3D_STRUCT(mask);
+ Tensor3D dst = GC_CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+ float random1 = 0.f;
+ float random2 = 0.f;
+ uint inputv = uint(0);
+ uint outputv = uint(0);
+ uint maskv = uint(0);
+ vec2 input_vec = vec2(0, 0);
+ vec2 output_vec = vec2(0, 0);
+ vec2 mask_vec = vec2(0, 0);
+
+#ifdef FORWARD
+ random1 = rand(vec3(gl_GlobalInvocationID.xyz), SEED);
+ random2 = rand(vec3(float(gl_GlobalInvocationID.x) + 0.5f, gl_GlobalInvocationID.yz), SEED);
+ mask_vec.x = (random1 > RATIO) ? 1.f : 0.f;
+ mask_vec.y = (random2 > RATIO) ? 1.f : 0.f;
+ maskv = packHalf2x16(mask_vec);
+ GC_STORE1_3D_OFFSET(maskv, mask, 0, 0, 0);
+#else /* FORWARD */
+ GC_LOAD1_3D_OFFSET(maskv, mask, 0, 0, 0);
+ mask_vec = unpackHalf2x16(maskv);
+#endif /* FORWARD */
+
+ GC_LOAD1_3D_OFFSET(inputv, src, 0, 0, 0);
+
+ input_vec = unpackHalf2x16(inputv);
+ output_vec = mask_vec * input_vec * float(SCALE);
+ outputv = packHalf2x16(output_vec);
+
+ GC_STORE1_3D_OFFSET(outputv, dst, 0, 0, 0);
+}
+
+#else /* DATA_TYPE_FP32 */
+
+#endif /* DATA_TYPE_FP32 */