APPBROWSER-298: Remove the old shader common code

Remove token pasting operator support for GLES shader Remove cs_shdaers/helpers.h (The old GLES shader common code) Remove class BufferParam. We don't need to pass the buffer_data_type_shift to GLES shader. Change-Id: Ic4fa6b2fb7647b8f69759f6077ae4a5b483cc04d Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/115448 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Frank Lei <frank.lei@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Joel Liang <joel.liang@arm.com> 2018-01-08 15:20:48 +0800
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:42:33 +0000
commit: abd03cfc7ba96462bc5a1ca6f4faa9ca22792158 (patch)
tree: fef70cbe7b3e2ee18f69e880a0586189bd00a37b
parent: 088cc7ff4623b268275f38c1ffb6d373584335ed (diff)
download: ComputeLibrary-abd03cfc7ba96462bc5a1ca6f4faa9ca22792158.tar.gz
10 files changed, 37 insertions, 964 deletions
diff --git a/arm_compute/core/GLES_COMPUTE/IGCKernel.h b/arm_compute/core/GLES_COMPUTE/IGCKernel.h
index ee1e166424..ce7717e8ea 100644
--- a/arm_compute/core/GLES_COMPUTE/IGCKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/IGCKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,25 +46,6 @@ public:
      */
     GCKernel &kernel();
 
-    class BufferParam
-    {
-    public:
-        /** Tensor's binding point in this kernel. */
-        unsigned int binding_point = 0;
-        /** The base 2 logarithm of SSBO buffer data type size (Number of bits to be shift for offset calculation) */
-        unsigned int buffer_data_type_shift = 0;
-
-        /** Constructor
-         *
-         * @param[in] binding Tensor's binding point.
-         * @param[in] shift   Number of bits to be shift for offset calculation
-         */
-        BufferParam(const unsigned int binding, const unsigned int shift)
-            : binding_point(binding), buffer_data_type_shift(shift)
-        {
-        }
-    };
-
     /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
      *
      * @param[in] idx           Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
@@ -74,15 +55,6 @@ public:
      */
     void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
 
-    /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
-     *
-     * @param[in] idx    Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
-     * @param[in] tensor Tensor to set as an argument of the object's kernel.
-     * @param[in] param  Additional parameter for GLES SSBO buffer.
-     * @param[in] window Window the kernel will be executed on.
-     */
-    void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
-
     /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
      *
      * @param[in] idx           Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
@@ -92,15 +64,6 @@ public:
      */
     void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
 
-    /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
-     *
-     * @param[in] idx    Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
-     * @param[in] tensor Tensor to set as an argument of the object's kernel.
-     * @param[in] param  Additional parameter for GLES SSBO buffer.
-     * @param[in] window Window the kernel will be executed on.
-     */
-    void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
-
     /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
      *
      * @param[in] idx           Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
@@ -110,15 +73,6 @@ public:
      */
     void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
 
-    /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
-     *
-     * @param[in] idx    Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
-     * @param[in] tensor Tensor to set as an argument of the object's kernel.
-     * @param[in] param  Additional parameter for GLES SSBO buffer.
-     * @param[in] window Window the kernel will be executed on.
-     */
-    void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
-
     /** Returns the number of arguments enqueued per 1D tensor object.
      *
      * @return The number of arguments enqueues per 1D tensor object.
@@ -154,13 +108,13 @@ public:
 private:
     /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.
      *
-     * @param[in] idx    Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
-     * @param[in] tensor Tensor to set as an argument of the object's kernel.
-     * @param[in] param  Additional parameter for GLES SSBO buffer.
-     * @param[in] window Window the kernel will be executed on.
+     * @param[in] idx           Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor        Tensor to set as an argument of the object's kernel.
+     * @param[in] binding_point Tensor's binding point in this kernel.
+     * @param[in] window        Window the kernel will be executed on.
      */
     template <unsigned int dimension_size>
-    void add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
+    void add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
 
     /** Returns the number of arguments enqueued per tensor object.
      *
diff --git a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
index 7766f95bcc..4c84c674fc 100644
--- a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
+++ b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -380,21 +380,9 @@ const std::string GCKernelLibrary::preprocess_shader(const std::string &shader_s
         FIRST,
         SKIP_COMMENTS = FIRST,
         RESOLVE_INCLUDES,
-        SKIP_PREPROCESSOR_DIRECTIVES,
-        SEARCH_MACRO_DEFINITIONS,
-        EXPAND_MACRO_USES,
         LAST
     };
 
-    struct MacroDefinitionInfo
-    {
-        const std::vector<std::string> param_list;
-        const std::string              content;
-    };
-
-    // Found macro definitions so far
-    std::map<const std::string, const MacroDefinitionInfo> macro_definitions;
-
     // Define a GLES compute shader parser function
     std::function<std::string(const std::string &, ParserStage, int)> cs_parser;
     cs_parser = [&](const std::string & src, ParserStage stage, int nested_level) -> std::string
@@ -416,35 +404,6 @@ const std::string GCKernelLibrary::preprocess_shader(const std::string &shader_s
             case ParserStage::RESOLVE_INCLUDES:
                 search_pattern = R"rgx((?:^|\n)[ \t]*#include "(.*)")rgx";
                 break;
-            case ParserStage::SKIP_PREPROCESSOR_DIRECTIVES:
-                search_pattern = R"((^|\n)[ \t]*(#ifdef|#ifndef|#if)[^\n]+)";
-                break;
-            case ParserStage::SEARCH_MACRO_DEFINITIONS:
-                search_pattern = R"((?:^|\n)[ \t]*#define[ \t]+(\w+)(?:\((\w+(?:[ \t]*,[ \t]*\w+)*)\))?(?: |\t|\\\n)*((?:(?:[^\\\n]|\\[^\n])*\\+\n)*(?:[ \t]*[^ \t\n]+)*)[ \t]*)";
-                break;
-            case ParserStage::EXPAND_MACRO_USES:
-            {
-                if(macro_definitions.empty())
-                {
-                    // Nothing to expand
-                    return src;
-                }
-                int i = 0;
-                for(auto &def : macro_definitions)
-                {
-                    if(i == 0)
-                    {
-                        search_pattern = R"((\b)" + def.first;
-                    }
-                    else
-                    {
-                        search_pattern += R"(\b|\b)" + def.first;
-                    }
-                    i++;
-                }
-                search_pattern += R"(\b))";
-                break;
-            }
             default:
                 break;
         }
@@ -469,126 +428,7 @@ const std::string GCKernelLibrary::preprocess_shader(const std::string &shader_s
                     dst.append(cs_parser(read_file(source_name, false), ParserStage::FIRST, 0));
                     break;
                 }
-                case ParserStage::SEARCH_MACRO_DEFINITIONS:
-                {
-                    std::regex                     params_regex(R"(\b\w+\b)");
-                    const std::string              macro_param_str = match.str(2);
-                    const std::vector<std::string> macro_param_list(
-                        std::sregex_token_iterator(macro_param_str.begin(),
-                                                   macro_param_str.end(),
-                                                   params_regex),
-                        std::sregex_token_iterator());
-
-                    const MacroDefinitionInfo info =
-                    {
-                        macro_param_list,
-                        match.str(3)
-                    };
-                    // Collect the macro definition data and not change the shader source
-                    macro_definitions.insert(std::pair<const std::string, const MacroDefinitionInfo>(match.str(1), info));
-                    dst.append(match.str());
-                    break;
-                }
-                case ParserStage::EXPAND_MACRO_USES:
-                {
-                    ptrdiff_t                args_str_length = 0;
-                    std::vector<std::string> args_list;
-
-                    // Walk through argument list, because the regular expression does NOT support nested parentheses
-                    size_t cur_args_str_pos = match.position() + match.length();
-                    if(src[cur_args_str_pos++] == '(')
-                    {
-                        int       nested_parentheses = 0;
-                        ptrdiff_t cur_arg_pos        = cur_args_str_pos;
-                        ptrdiff_t cur_arg_length     = 0;
-
-                        args_str_length++;
-                        while(src[cur_args_str_pos] != ')' || nested_parentheses != 0)
-                        {
-                            switch(src[cur_args_str_pos++])
-                            {
-                                case '(':
-                                    nested_parentheses++;
-                                    cur_arg_length++;
-                                    break;
-                                case ',':
-                                    if(nested_parentheses == 0)
-                                    {
-                                        args_list.push_back(src.substr(cur_arg_pos, cur_arg_length));
-                                        cur_arg_pos    = cur_args_str_pos;
-                                        cur_arg_length = 0;
-                                    }
-                                    else
-                                    {
-                                        cur_arg_length++;
-                                    }
-                                    break;
-                                case ' ':
-                                case '\t':
-                                    if(cur_arg_length == 0)
-                                    {
-                                        cur_arg_pos++;
-                                    }
-                                    else
-                                    {
-                                        cur_arg_length++;
-                                    }
-                                    break;
-                                case ')':
-                                    nested_parentheses--;
-                                // no break here!
-                                default:
-                                    cur_arg_length++;
-                                    break;
-                            }
-                            args_str_length++;
-                        }
-                        if(src[cur_args_str_pos] == ')' && nested_parentheses == 0)
-                        {
-                            args_list.push_back(src.substr(cur_arg_pos, cur_arg_length));
-                        }
-                        args_str_length++;
-                    }
-
-                    std::string                    expanded_content = match.str();
-                    const std::vector<std::string> macro_param_list = macro_definitions.at(match.str()).param_list;
-
-                    if((nested_level != 0 || !macro_param_list.empty()) && macro_param_list.size() == args_list.size())
-                    {
-                        parsed_pos += args_str_length;
-                        expanded_content = macro_definitions.at(match.str()).content;
-                        size_t i         = 0;
-                        for(auto &param_name : macro_param_list)
-                        {
-                            std::regex params_regex(R"(\b)" + param_name + R"(\b)");
-                            expanded_content.assign(std::regex_replace(expanded_content, params_regex, args_list[i]));
-                            ++i;
-                        }
-                        // Expand macro recursively
-                        expanded_content = cs_parser(expanded_content, stage, nested_level + 1);
-
-                        if(nested_level == 0)
-                        {
-                            const std::regex token_pasting_rgx = std::regex(R"(\b##\b)");
-                            if(std::regex_search(expanded_content, token_pasting_rgx))
-                            {
-                                // Remove token pasting operator "##"
-                                expanded_content.assign(std::regex_replace(expanded_content, std::regex(token_pasting_rgx), ""));
-                                // Trim trailing whitespace
-                                expanded_content.assign(std::regex_replace(expanded_content, std::regex(R"([ \t]*\\\n)"), "\n"));
-                            }
-                            else
-                            {
-                                // Do not expand the macro if the result does not have token pasting operator "##"
-                                expanded_content = src.substr(match.position(), match.length() + args_str_length);
-                            }
-                        }
-                    }
-                    dst.append(expanded_content);
-                    break;
-                }
                 case ParserStage::SKIP_COMMENTS:
-                case ParserStage::SKIP_PREPROCESSOR_DIRECTIVES:
                 default:
                     dst.append(match.str());
                     break;
@@ -622,12 +462,7 @@ const GCProgram &GCKernelLibrary::load_program(const std::string &program_name)
         ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
     }
 
-    // TODO(APPBROWSER-298): Do not call shader preprocessor here
-    //       We should do the preprocess at compile time
-    //       The preprocess_shader function is used for support "#include" directive and token pasting operator "##".
-    //       This job could be done at compile time by using a python script in order to get better performance at runtime.
-    //       BTW: We usually defined EMBEDDED_KERNELS in release build.
-    program = GCProgram(program_name, preprocess_shader(program_source_it->second));
+    program = GCProgram(program_name, program_source_it->second);
 #else  /* EMBEDDED_KERNELS */
     // Check for binary
     std::string source_name = _shader_path + program_name;
diff --git a/src/core/GLES_COMPUTE/IGCKernel.cpp b/src/core/GLES_COMPUTE/IGCKernel.cpp
index df9c798e42..6666c0f3ae 100644
--- a/src/core/GLES_COMPUTE/IGCKernel.cpp
+++ b/src/core/GLES_COMPUTE/IGCKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -79,7 +79,7 @@ unsigned int           IGCKernel::num_arguments_per_tensor() const
 }
 
 template <unsigned int dimension_size>
-void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window)
+void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
 {
     ARM_COMPUTE_ERROR_ON(tensor == nullptr);
 
@@ -103,7 +103,6 @@ void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor,
     }
 
     _kernel.set_argument(idx++, offset_first_element);
-    _kernel.set_argument(idx++, param.buffer_data_type_shift);
 
     // Rounding up the tensor attributes structure in compute shader to a multiple of a vec4
     unsigned int idx_end = ceil_to_multiple(idx, 4);
@@ -113,7 +112,7 @@ void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor,
     }
     idx = idx_end;
 
-    ARM_COMPUTE_GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, param.binding_point, tensor->gc_buffer()));
+    ARM_COMPUTE_GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, binding_point, tensor->gc_buffer()));
 
     ARM_COMPUTE_ERROR_ON_MSG(idx_start + num_arguments_per_tensor<dimension_size>() != idx,
                              "add_%dD_tensor_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_tensor<dimension_size>());
@@ -122,32 +121,17 @@ void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor,
 
 void IGCKernel::add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
 {
-    add_tensor_argument<1>(idx, tensor, BufferParam(binding_point, 0), window);
-}
-
-void IGCKernel::add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window)
-{
-    add_tensor_argument<1>(idx, tensor, param, window);
+    add_tensor_argument<1>(idx, tensor, binding_point, window);
 }
 
 void IGCKernel::add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
 {
-    add_tensor_argument<2>(idx, tensor, BufferParam(binding_point, 0), window);
-}
-
-void IGCKernel::add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window)
-{
-    add_tensor_argument<2>(idx, tensor, param, window);
+    add_tensor_argument<2>(idx, tensor, binding_point, window);
 }
 
 void IGCKernel::add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
 {
-    add_tensor_argument<3>(idx, tensor, BufferParam(binding_point, 0), window);
-}
-
-void IGCKernel::add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window)
-{
-    add_tensor_argument<3>(idx, tensor, param, window);
+    add_tensor_argument<3>(idx, tensor, binding_point, window);
 }
 
 unsigned int IGCKernel::num_arguments_per_1D_tensor() const
diff --git a/src/core/GLES_COMPUTE/cs_shaders/helpers.h b/src/core/GLES_COMPUTE/cs_shaders/helpers.h
deleted file mode 100644
index ba27eec716..0000000000
--- a/src/core/GLES_COMPUTE/cs_shaders/helpers.h
+++ /dev/null
@@ -1,584 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_HELPER_H
-#define ARM_COMPUTE_HELPER_H
-
-#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
-
-#define VEC_DATA_TYPE_STR(type, size) type##size
-#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
-
-#define CONVERT(x, type) type(x)
-
-#define PACK(value, stype, dtype) \
-    pack_##stype##_##dtype(value)
-
-#define UNPACK(value, stype, dtype) \
-    unpack_##stype##_##dtype(value)
-
-#define BUFFER_DECLARATION(name, location, type, access)          \
-    layout(std430, binding = location) access buffer name##Buffer \
-    {                                                             \
-        type name##_ptr[];                                        \
-    }
-
-#define VECTOR_PARAM_DECLARATION(name)         \
-    uint name##_stride_x;                      \
-    uint name##_step_x;                        \
-    uint name##_offset_first_element_in_bytes; \
-    uint name##_buffer_data_type_size
-
-#define IMAGE_PARAM_DECLARATION(name)          \
-    uint name##_stride_x;                      \
-    uint name##_step_x;                        \
-    uint name##_stride_y;                      \
-    uint name##_step_y;                        \
-    uint name##_offset_first_element_in_bytes; \
-    uint name##_buffer_data_type_size;         \
-    uint name##_padding1;                      \
-    uint name##_padding2
-
-#define TENSOR3D_PARAM_DECLARATION(name)       \
-    uint name##_stride_x;                      \
-    uint name##_step_x;                        \
-    uint name##_stride_y;                      \
-    uint name##_step_y;                        \
-    uint name##_stride_z;                      \
-    uint name##_step_z;                        \
-    uint name##_offset_first_element_in_bytes; \
-    uint name##_buffer_data_type_size
-
-/** Structure to hold Vector information */
-struct Vector
-{
-    uint current_offset;                /**< Current offset of vector */
-    uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */
-    uint stride_x;                      /**< Stride of the image in X dimension (in bytes) */
-};
-
-/** Structure to hold Image information */
-struct Image
-{
-    uint current_offset;                /**< Current offset of image */
-    uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */
-    uint stride_x;                      /**< Stride of the image in X dimension (in bytes) */
-    uint stride_y;                      /**< Stride of the image in Y dimension (in bytes) */
-};
-
-/** Structure to hold 3D tensor information */
-struct Tensor3D
-{
-    uint current_offset;                /**< Current offset of tensor */
-    uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */
-    uint stride_x;                      /**< Stride of the image in X dimension (in bytes) */
-    uint stride_y;                      /**< Stride of the image in Y dimension (in bytes) */
-    uint stride_z;                      /**< Stride of the image in Z dimension (in bytes) */
-};
-
-/////////////////////////////////////////////////////////////
-// TODO: old to be removed
-
-#define CONVERT_TO_VECTOR_STRUCT(name) \
-    update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
-
-#define CONVERT_TO_VECTOR_STRUCT_FP16(name) \
-    update_vector_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
-
-#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
-    update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0))
-
-#define CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(name) \
-    update_vector_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0))
-
-#define CONVERT_TO_IMAGE_STRUCT(name) \
-    update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
-
-#define CONVERT_TO_IMAGE_STRUCT_FP16(name) \
-    update_image_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
-
-#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
-    update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0))
-
-#define CONVERT_TO_IMAGE_STRUCT_NO_STEP_FP16(name) \
-    update_image_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0))
-
-#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
-    update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z)
-
-#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP_FP16(name) \
-    update_image_from_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z)
-
-#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
-    update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
-
-#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_FP16(name) \
-    update_image_from_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
-
-#define CONVERT_TO_TENSOR3D_STRUCT(name)                                                                                                  \
-    update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
-                                    name##_stride_z, name##_step_z)
-
-#define CONVERT_TO_TENSOR3D_STRUCT_FP16(name)                                                                                                  \
-    update_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
-                                         name##_stride_z, name##_step_z)
-
-#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
-    update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0))
-
-#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(name) \
-    update_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0))
-
-// FIXME: Redesign the macros if different data types are supported.
-#define LOAD4(name, offset) \
-    name##_ptr[offset]
-
-#define STORE4(name, offset, value) \
-    name##_ptr[offset] = value
-
-// Load 1 element, which size is determined by ssbo type.
-#define LOAD1(r, name, offset) \
-    r = name##_ptr[offset]
-
-#define STORE1(name, offset, value) \
-    name##_ptr[offset] = value
-
-#define LOAD2(r, name, offset) \
-    LOAD1(r[0], name, offset); \
-    LOAD1(r[1], name, (offset) + uint(1))
-
-#define STORE2(name, offset, value)            \
-    name##_ptr[offset]             = value[0]; \
-    name##_ptr[(offset) + uint(1)] = value[1]
-
-#define LOAD3(r, name, offset)             \
-    LOAD1(r[0], name, offset);             \
-    LOAD1(r[1], name, (offset) + uint(1)); \
-    LOAD1(r[2], name, (offset) + uint(2))
-
-#define CURRENT_OFFSET(name) \
-    name.current_offset
-
-/** Wrap vector information into an Vector structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
- * @param[in] stride_x                      Stride of the vector in X dimension (in bytes)
- * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
- *
- * @return An vector object
- */
-Vector update_vector_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x)
-{
-    Vector vector;
-    vector.offset_first_element_in_bytes = offset_first_element_in_bytes;
-    vector.stride_x                      = stride_x;
-    vector.current_offset                = (vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x) >> 2;
-
-    return vector;
-}
-
-/** Wrap vector information into an Vector structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
- * @param[in] stride_x                      Stride of the vector in X dimension (in bytes)
- * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
- *
- * @return An vector object
- */
-Vector update_vector_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x)
-{
-    Vector vector;
-    vector.offset_first_element_in_bytes = offset_first_element_in_bytes;
-    vector.stride_x                      = stride_x;
-    vector.current_offset                = vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x;
-
-    return vector;
-}
-
-/** Wrap image information into an Image structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x                      Stride of the image in X dimension (in bytes)
- * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] stride_y                      Stride of the image in Y dimension (in bytes)
- * @param[in] step_y                        stride_y * number of elements along Y processed per workitem(in bytes)
- *
- * @return An image object
- */
-Image update_image_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
-{
-    Image img;
-    img.offset_first_element_in_bytes = offset_first_element_in_bytes;
-    img.stride_x                      = stride_x;
-    img.stride_y                      = stride_y;
-    img.current_offset                = (img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y) >> 2;
-
-    return img;
-}
-
-/** Wrap image information into an Image structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x                      Stride of the image in X dimension (in bytes)
- * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] stride_y                      Stride of the image in Y dimension (in bytes)
- * @param[in] step_y                        stride_y * number of elements along Y processed per workitem(in bytes)
- *
- * @return An image object
- */
-Image update_image_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
-{
-    Image img;
-    img.offset_first_element_in_bytes = offset_first_element_in_bytes;
-    img.stride_x                      = stride_x;
-    img.stride_y                      = stride_y;
-    img.current_offset                = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y;
-
-    return img;
-}
-
-/** Wrap 3D tensor information into an image structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x                      Stride of the image in X dimension (in bytes)
- * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] stride_y                      Stride of the image in Y dimension (in bytes)
- * @param[in] step_y                        stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] stride_z                      Stride of the image in Z dimension (in bytes)
- * @param[in] step_z                        stride_z * number of elements along Z processed per workitem(in bytes)
- *
- * @return A 2D Image object
- */
-Image update_image_from_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
-{
-    Image img;
-    img.offset_first_element_in_bytes = offset_first_element_in_bytes;
-    img.stride_x                      = stride_x;
-    img.stride_y                      = stride_y;
-    img.current_offset                = (img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z) >> 2;
-
-    return img;
-}
-
-/** Wrap 3D tensor information into an image structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x                      Stride of the image in X dimension (in bytes)
- * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] stride_y                      Stride of the image in Y dimension (in bytes)
- * @param[in] step_y                        stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] stride_z                      Stride of the image in Z dimension (in bytes)
- * @param[in] step_z                        stride_z * number of elements along Z processed per workitem(in bytes)
- *
- * @return A 2D Image object
- */
-Image update_image_from_tensor3D_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
-{
-    Image img;
-    img.offset_first_element_in_bytes = offset_first_element_in_bytes;
-    img.stride_x                      = stride_x;
-    img.stride_y                      = stride_y;
-    img.current_offset                = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
-
-    return img;
-}
-
-/** Wrap 3D tensor information into an tensor structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x                      Stride of the image in X dimension (in bytes)
- * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] stride_y                      Stride of the image in Y dimension (in bytes)
- * @param[in] step_y                        stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] stride_z                      Stride of the image in Z dimension (in bytes)
- * @param[in] step_z                        stride_z * number of elements along Z processed per workitem(in bytes)
- *
- * @return A 3D tensor object
- */
-Tensor3D update_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
-{
-    Tensor3D tensor;
-    tensor.offset_first_element_in_bytes = offset_first_element_in_bytes;
-    tensor.stride_x                      = stride_x;
-    tensor.stride_y                      = stride_y;
-    tensor.stride_z                      = stride_z;
-    tensor.current_offset                = (tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z) >> 2;
-
-    return tensor;
-}
-
-/** Wrap 3D tensor information into an tensor structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x                      Stride of the image in X dimension (in bytes)
- * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] stride_y                      Stride of the image in Y dimension (in bytes)
- * @param[in] step_y                        stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] stride_z                      Stride of the image in Z dimension (in bytes)
- * @param[in] step_z                        stride_z * number of elements along Z processed per workitem(in bytes)
- *
- * @return A 3D tensor object
- */
-Tensor3D update_tensor3D_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
-{
-    Tensor3D tensor;
-    tensor.offset_first_element_in_bytes = offset_first_element_in_bytes;
-    tensor.stride_x                      = stride_x;
-    tensor.stride_y                      = stride_y;
-    tensor.stride_z                      = stride_z;
-    tensor.current_offset                = tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
-
-    return tensor;
-}
-
-/** Get the pointer position of a Vector
- *
- * @param[in] vec Pointer to the starting position of the buffer
- * @param[in] x   Relative X position
- */
-uint vector_offset(Vector vec, int x)
-{
-    return CONVERT(CONVERT(vec.current_offset << 2, int) + x * CONVERT(vec.stride_x, int), uint) >> 2;
-}
-
-/** Get the pointer position of a Vector
- *
- * @param[in] vec Pointer to the starting position of the buffer
- * @param[in] x   Relative X position
- */
-uint vector_offset_fp16(Vector vec, int x)
-{
-    return CONVERT(CONVERT(vec.current_offset, int) + x * CONVERT(vec.stride_x, int), uint);
-}
-
-/** Get the pointer position of a Image
- *
- * @param[in] img Pointer to the starting position of the buffer
- * @param[in] x   Relative X position
- * @param[in] y   Relative Y position
- */
-uint offset(Image img, int x, int y)
-{
-    return CONVERT(CONVERT(img.current_offset << 2, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint) >> 2;
-}
-
-/** Get the pointer position of a Image
- *
- * @param[in] img Pointer to the starting position of the buffer
- * @param[in] x   Relative X position
- * @param[in] y   Relative Y position
- */
-uint offset_fp16(Image img, int x, int y)
-{
-    return CONVERT(CONVERT(img.current_offset, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint);
-}
-
-/** Get the pointer position of a Tensor3D
- *
- * @param[in] tensor Pointer to the starting postion of the buffer
- * @param[in] x      Relative X position
- * @param[in] y      Relative Y position
- * @param[in] z      Relative Z position
- */
-uint tensor3D_offset(Tensor3D tensor, int x, int y, int z)
-{
-    return CONVERT(CONVERT(tensor.current_offset << 2, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint) >> 2;
-}
-
-/** Get the pointer position of a Tensor3D
- *
- * @param[in] tensor Pointer to the starting postion of the buffer
- * @param[in] x      Relative X position
- * @param[in] y      Relative Y position
- * @param[in] z      Relative Z position
- */
-uint tensor3D_offset_fp16(Tensor3D tensor, int x, int y, int z)
-{
-    return CONVERT(CONVERT(tensor.current_offset, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint);
-}
-
-/////////////////////////////////////////////////////////////
-// new one
-
-#define GC_CONVERT_TO_VECTOR_STRUCT(name) \
-    gc_update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
-
-#define GC_CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
-    gc_update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0))
-
-#define GC_CONVERT_TO_IMAGE_STRUCT(name) \
-    gc_update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
-
-#define GC_CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
-    gc_update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0))
-
-#define GC_CONVERT_TO_TENSOR3D_STRUCT(name)                                                                                                  \
-    gc_update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
-                                       name##_stride_z, name##_step_z)
-
-#define GC_CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
-    gc_update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0))
-
-#define GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
-    gc_update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
-
-#define GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
-    gc_update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z)
-
-Vector gc_update_vector_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x)
-{
-    Vector vector;
-    vector.offset_first_element_in_bytes = offset_first_element_in_bytes;
-    vector.stride_x                      = stride_x;
-    vector.current_offset                = vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x;
-
-    return vector;
-}
-
-Image gc_update_image_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
-{
-    Image img;
-    img.offset_first_element_in_bytes = offset_first_element_in_bytes;
-    img.stride_x                      = stride_x;
-    img.stride_y                      = stride_y;
-    img.current_offset                = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y;
-
-    return img;
-}
-
-Tensor3D gc_update_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
-{
-    Tensor3D tensor;
-    tensor.offset_first_element_in_bytes = offset_first_element_in_bytes;
-    tensor.stride_x                      = stride_x;
-    tensor.stride_y                      = stride_y;
-    tensor.stride_z                      = stride_z;
-    tensor.current_offset                = tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
-
-    return tensor;
-}
-
-Image gc_update_image_from_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
-{
-    Image img;
-    img.offset_first_element_in_bytes = offset_first_element_in_bytes;
-    img.stride_x                      = stride_x;
-    img.stride_y                      = stride_y;
-    img.current_offset                = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
-
-    return img;
-}
-
-#define GC_CURRENT_OFFSET(name) \
-    name.current_offset
-
-uint gc_vector_offset(Vector vec, int x)
-{
-    return CONVERT(CONVERT(vec.current_offset, int) + x * CONVERT(vec.stride_x, int), uint);
-}
-
-uint gc_image_offset(Image img, int x, int y)
-{
-    return CONVERT(CONVERT(img.current_offset, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint);
-}
-
-uint gc_tensor3D_offset(Tensor3D tensor, int x, int y, int z)
-{
-    return CONVERT(CONVERT(tensor.current_offset, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint);
-}
-
-// load/store number of element depends on buffer type
-#define GC_LOAD1(r, name, offset) \
-    r = name##_ptr[offset]
-
-#define GC_LOAD2(r, name, offset) \
-    GC_LOAD1(r[0], name, offset); \
-    GC_LOAD1(r[1], name, (offset) + uint(1))
-
-#define GC_LOAD3(r, name, offset)             \
-    GC_LOAD1(r[0], name, offset);             \
-    GC_LOAD1(r[1], name, (offset) + uint(1)); \
-    GC_LOAD1(r[2], name, (offset) + uint(2))
-
-#define GC_STORE1(value, name, offset) \
-    name##_ptr[offset] = value
-
-#define GC_STORE2(value, name, offset) \
-    GC_STORE1(value[0], name, offset); \
-    GC_STORE1(value[1], name, (offset) + uint(1))
-
-#define GC_STORE3(value, name, offset)             \
-    GC_STORE1(value[0], name, offset);             \
-    GC_STORE1(value[1], name, (offset) + uint(1)); \
-    GC_STORE1(value[2], name, (offset) + uint(2))
-
-// has to manually expand them since not supported by compiler
-#define GC_LOAD1_1D_OFFSET(r, name, x) \
-    GC_LOAD1(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD1_2D_OFFSET(r, name, x, y) \
-    GC_LOAD1(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD1_3D_OFFSET(r, name, x, y, z) \
-    GC_LOAD1(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
-
-#define GC_STORE1_1D_OFFSET(value, name, x) \
-    GC_STORE1(value, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
-
-#define GC_STORE1_2D_OFFSET(value, name, x, y) \
-    GC_STORE1(value, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
-
-#define GC_STORE1_3D_OFFSET(value, name, x, y, z) \
-    GC_STORE1(value, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD2_1D_OFFSET(r, name, x) \
-    GC_LOAD2(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD2_2D_OFFSET(r, name, x, y) \
-    GC_LOAD2(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD2_3D_OFFSET(r, name, x, y, z) \
-    GC_LOAD2(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
-
-#define GC_STORE2_1D_OFFSET(value, name, x) \
-    GC_STORE2(value, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
-
-#define GC_STORE2_2D_OFFSET(value, name, x, y) \
-    GC_STORE2(value, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
-
-#define GC_STORE2_3D_OFFSET(value, name, x, y, z) \
-    GC_STORE2(value, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD3_1D_OFFSET(r, name, x) \
-    GC_LOAD3(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD3_2D_OFFSET(r, name, x, y) \
-    GC_LOAD3(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD3_3D_OFFSET(r, name, x, y, z) \
-    GC_LOAD3(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
-
-/////////////////////////////////////////////////////////////
-
-#endif // _HELPER_H
diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
index a6111782fd..1aac2502e7 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -122,18 +122,9 @@ void GCDepthConcatenateLayerKernel::run(const Window &window)
 
     do
     {
-        if(_input->info()->data_type() == DataType::F32)
-        {
-            unsigned int idx = 0;
-            add_3D_tensor_argument(idx, _input, 1, slice);
-            add_3D_tensor_argument(idx, _output, 2, slice);
-        }
-        else if(_input->info()->data_type() == DataType::F16)
-        {
-            unsigned int idx = 0;
-            add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice);
-            add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice);
-        }
+        unsigned int idx = 0;
+        add_3D_tensor_argument(idx, _input, 1, slice);
+        add_3D_tensor_argument(idx, _output, 2, slice);
 
         _kernel.update_shader_params();
 
diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
index 06f9bce498..77423fd8bc 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -397,69 +397,21 @@ void GCDirectConvolutionLayerKernel<kernel_size>::run(const Window &window)
     Window slice_in = win_in.first_slice_window_3D();
 
     unsigned int idx1 = 2 * num_arguments_per_3D_tensor();
-    add_3D_tensor_argument(idx1, _weights, BufferParam(3, 2), slice);
+    add_3D_tensor_argument(idx1, _weights, 3, slice);
 
     if(_bias != nullptr)
     {
         Window slice_bias;
         slice_bias.use_tensor_dimensions(_bias->info()->tensor_shape());
-        add_1D_tensor_argument(idx1, _bias, BufferParam(4, 2), slice_bias);
+        add_1D_tensor_argument(idx1, _bias, 4, slice_bias);
     }
 
     do
     {
         unsigned int idx = 0;
 
-        switch(_input->info()->data_type())
-        {
-            case DataType::F16:
-                switch(kernel_size)
-                {
-                    case 1:
-                        add_3D_tensor_argument(idx, _input, BufferParam(1, 4), slice_in);
-                        add_3D_tensor_argument(idx, _output, BufferParam(2, 4), slice);
-                        break;
-
-                    case 3:
-                        add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice_in);
-                        add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice);
-                        break;
-
-                    case 5:
-                        add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice_in);
-                        add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice);
-                        break;
-
-                    default:
-                        ARM_COMPUTE_ERROR("Current kernel size %d is not supported", kernel_size);
-                        break;
-                }
-                break;
-
-            case DataType::F32:
-                switch(kernel_size)
-                {
-                    case 1:
-                    case 5:
-                        add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice_in);
-                        add_3D_tensor_argument(idx, _output, BufferParam(2, 2), slice);
-                        break;
-
-                    case 3:
-                        add_3D_tensor_argument(idx, _input, BufferParam(1, 4), slice_in);
-                        add_3D_tensor_argument(idx, _output, BufferParam(2, 4), slice);
-                        break;
-
-                    default:
-                        ARM_COMPUTE_ERROR("Current kernel size %d is not supported", kernel_size);
-                        break;
-                }
-                break;
-
-            default:
-                ARM_COMPUTE_ERROR("Current data type is not supported");
-                break;
-        }
+        add_3D_tensor_argument(idx, _input, 1, slice_in);
+        add_3D_tensor_argument(idx, _output, 2, slice);
 
         _kernel.update_shader_params();
         enqueue(*this, slice, _lws);
diff --git a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp
index e87c902281..cd06be2585 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -96,9 +96,9 @@ void GCDropoutLayerKernel::run(const Window &window)
     {
         unsigned int idx = 0;
 
-        add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice);
-        add_3D_tensor_argument(idx, _mask, BufferParam(2, 2), slice);
-        add_3D_tensor_argument(idx, _output, BufferParam(3, 2), slice);
+        add_3D_tensor_argument(idx, _input, 1, slice);
+        add_3D_tensor_argument(idx, _mask, 2, slice);
+        add_3D_tensor_argument(idx, _output, 3, slice);
 
         _kernel.update_shader_params();
         enqueue(*this, slice);
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
index 944585daff..7ee39346f8 100644
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -117,26 +117,9 @@ void GCGEMMMatrixAccumulateBiasesKernel::run(const Window &window)
     {
         // Set arguments
         unsigned int idx = 0;
-        if(_accum->info()->data_type() == DataType::F32)
-        {
-            add_2D_tensor_argument(idx, _accum, 1, accum_slice);
-            add_1D_tensor_argument(idx, _biases, 2, biases_slice);
-        }
-        else if(_accum->info()->data_type() == DataType::F16)
-        {
-#if defined(ACCUM_PROCESS_4X)
-            BufferParam param = { 1, 3 };
-            add_2D_tensor_argument(idx, _accum, param, accum_slice);
-            param.binding_point = 2;
-            add_1D_tensor_argument(idx, _biases, param, biases_slice);
-#elif defined(ACCUM_PROCESS_8X) /* ACCUM_PROCESS_4X */
-            BufferParam param             = { 1, 4 };
-            add_2D_tensor_argument(idx, _accum, param, accum_slice);
-            param.binding_point = 2;
-            add_1D_tensor_argument(idx, _biases, param, biases_slice);
-#endif                          /* ACCUM_PROCESS_4X */
-        }
 
+        add_2D_tensor_argument(idx, _accum, 1, accum_slice);
+        add_1D_tensor_argument(idx, _biases, 2, biases_slice);
         _kernel.update_shader_params();
 
         enqueue(*this, accum_slice, _lws);
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
index baf1a9b85e..32fbbfeefb 100644
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -201,35 +201,10 @@ void GCGEMMMatrixMultiplyKernel::run(const Window &window)
         }
 
         unsigned int idx = 0;
-        switch(_input0->info()->data_type())
-        {
-            case DataType::F16:
-#if defined(MM_PROCESS_4X)
-                add_2D_tensor_argument(idx, _input0, BufferParam(1, 2), slice);
-                add_2D_tensor_argument(idx, _input1, BufferParam(2, 3), slice_b);
-                add_2D_tensor_argument(idx, _output, BufferParam(3, 3), slice);
-#elif defined(MM_PROCESS_4X_OPTIMIZED) /* MM_PROCESS_4X */
-                add_2D_tensor_argument(idx, _input0, BufferParam(1, 4), slice);
-                add_2D_tensor_argument(idx, _input1, BufferParam(2, 3), slice_b);
-                add_2D_tensor_argument(idx, _output, BufferParam(3, 3), slice);
-#elif defined(MM_PROCESS_8X)           /* MM_PROCESS_4X */
-                add_2D_tensor_argument(idx, _input0, BufferParam(1, 4), slice);
-                add_2D_tensor_argument(idx, _input1, BufferParam(2, 4), slice_b);
-                add_2D_tensor_argument(idx, _output, BufferParam(3, 4), slice);
-#endif                                 /* MM_PROCESS_4X */
-                break;
-
-            case DataType::F32:
-                add_2D_tensor_argument(idx, _input0, BufferParam(1, 2), slice);
-                add_2D_tensor_argument(idx, _input1, BufferParam(2, 2), slice_b);
-                add_2D_tensor_argument(idx, _output, BufferParam(3, 2), slice);
-                break;
-
-            default:
-                ARM_COMPUTE_ERROR("Current data type is not supported");
-                break;
-        }
 
+        add_2D_tensor_argument(idx, _input0, 1, slice);
+        add_2D_tensor_argument(idx, _input1, 2, slice_b);
+        add_2D_tensor_argument(idx, _output, 3, slice);
         _kernel.update_shader_params();
         enqueue(*this, slice);
     }
diff --git a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
index 5edc23b95e..b3a7a90931 100644
--- a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -132,26 +132,9 @@ void GCTransposeKernel::run(const Window &window)
     do
     {
         unsigned int idx = 0;
-        if(_input->info()->data_type() == DataType::F32)
-        {
-            add_2D_tensor_argument(idx, _input, 1, slice);
-            add_2D_tensor_argument(idx, _output, 2, slice);
-        }
-        else if(_input->info()->data_type() == DataType::F16)
-        {
-#if defined(TRANSPOSE_4X4)
-            BufferParam param = { 1, 3 };
-            add_2D_tensor_argument(idx, _input, param, slice);
-            param.binding_point = 2;
-            add_2D_tensor_argument(idx, _output, param, slice);
-#elif defined(TRANSPOSE_8X8) /* TRANSPOSE_4X4 */
-            BufferParam param = { 1, 4 };
-            add_2D_tensor_argument(idx, _input, param, slice);
-            param.binding_point = 2;
-            add_2D_tensor_argument(idx, _output, param, slice);
-#endif                       /* TRANSPOSE_4X4 */
-        }
 
+        add_2D_tensor_argument(idx, _input, 1, slice);
+        add_2D_tensor_argument(idx, _output, 2, slice);
         _kernel.update_shader_params();
         enqueue(*this, slice, _lws_hint);
     }
author	Joel Liang <joel.liang@arm.com>	2018-01-08 15:20:48 +0800
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:42:33 +0000
commit	abd03cfc7ba96462bc5a1ca6f4faa9ca22792158 (patch)
tree	fef70cbe7b3e2ee18f69e880a0586189bd00a37b
parent	088cc7ff4623b268275f38c1ffb6d373584335ed (diff)
download	ComputeLibrary-abd03cfc7ba96462bc5a1ca6f4faa9ca22792158.tar.gz