From f1f3ebd517089e934cf3f06e64d90619a395ad87 Mon Sep 17 00:00:00 2001 From: Joel Liang Date: Fri, 10 Nov 2017 09:59:19 +0800 Subject: APPBROWSER-298, APPBROWSER-306: Reimplement the common code of compute shader The new common code of compute shader is in file helpers_cs.h Rewrite the direct_convolution1x1.cs and softmax_layer.cs to use the new common code. It will also remove the dependence of the token pasting operator (##). We'll remove the "##" support after we rewrite all of the compute shader code. Change-Id: Icd8553ef6b61ad484a8507590ac8ed499bd47061 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/95455 Tested-by: Kaizen Reviewed-by: Georgios Pinitas Reviewed-by: Frank Lei (cherry picked from commit 0a4f83570d261f839d9866b68979efe8d7a95883) Reviewed-on: http://mpd-gerrit.cambridge.arm.com/95601 Reviewed-by: Jim He --- src/core/GLES_COMPUTE/IGCKernel.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'src/core/GLES_COMPUTE/IGCKernel.cpp') diff --git a/src/core/GLES_COMPUTE/IGCKernel.cpp b/src/core/GLES_COMPUTE/IGCKernel.cpp index 154a2c0c66..d6ad6c47d9 100644 --- a/src/core/GLES_COMPUTE/IGCKernel.cpp +++ b/src/core/GLES_COMPUTE/IGCKernel.cpp @@ -74,7 +74,8 @@ GCKernel &IGCKernel::kernel() template unsigned int IGCKernel::num_arguments_per_tensor() const { - return 2 + 2 * dimension_size; + // Rounding up the tensor attributes structure in compute shader to a multiple of a vec4 + return ceil_to_multiple(1 + 2 * dimension_size, 4); } template @@ -97,12 +98,20 @@ void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, for(unsigned int dimension = 0; dimension < dimension_size; dimension++) { - _kernel.set_params(idx++, strides[dimension]); - _kernel.set_params(idx++, strides[dimension] * window[dimension].step()); + _kernel.set_argument(idx++, strides[dimension]); + _kernel.set_argument(idx++, strides[dimension] * window[dimension].step()); } - _kernel.set_params(idx++, offset_first_element); - _kernel.set_params(idx++, param.buffer_data_type_shift); + _kernel.set_argument(idx++, offset_first_element); + _kernel.set_argument(idx++, param.buffer_data_type_shift); + + // Rounding up the tensor attributes structure in compute shader to a multiple of a vec4 + unsigned int idx_end = ceil_to_multiple(idx, 4); + for(unsigned int i = idx; i < idx_end; ++i) + { + _kernel.set_argument(i, 0); + } + idx = idx_end; ARM_COMPUTE_GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, param.binding_point, tensor->gc_buffer())); -- cgit v1.2.1