aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Liang <joel.liang@arm.com>2018-01-08 15:20:48 +0800
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:33 +0000
commitabd03cfc7ba96462bc5a1ca6f4faa9ca22792158 (patch)
treefef70cbe7b3e2ee18f69e880a0586189bd00a37b
parent088cc7ff4623b268275f38c1ffb6d373584335ed (diff)
downloadComputeLibrary-abd03cfc7ba96462bc5a1ca6f4faa9ca22792158.tar.gz
APPBROWSER-298: Remove the old shader common code
Remove token pasting operator support for GLES shader Remove cs_shdaers/helpers.h (The old GLES shader common code) Remove class BufferParam. We don't need to pass the buffer_data_type_shift to GLES shader. Change-Id: Ic4fa6b2fb7647b8f69759f6077ae4a5b483cc04d Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/115448 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Frank Lei <frank.lei@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r--arm_compute/core/GLES_COMPUTE/IGCKernel.h58
-rw-r--r--src/core/GLES_COMPUTE/GCKernelLibrary.cpp169
-rw-r--r--src/core/GLES_COMPUTE/IGCKernel.cpp28
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/helpers.h584
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp17
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp58
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp8
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp23
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp33
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp23
10 files changed, 37 insertions, 964 deletions
diff --git a/arm_compute/core/GLES_COMPUTE/IGCKernel.h b/arm_compute/core/GLES_COMPUTE/IGCKernel.h
index ee1e166424..ce7717e8ea 100644
--- a/arm_compute/core/GLES_COMPUTE/IGCKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/IGCKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,25 +46,6 @@ public:
*/
GCKernel &kernel();
- class BufferParam
- {
- public:
- /** Tensor's binding point in this kernel. */
- unsigned int binding_point = 0;
- /** The base 2 logarithm of SSBO buffer data type size (Number of bits to be shift for offset calculation) */
- unsigned int buffer_data_type_shift = 0;
-
- /** Constructor
- *
- * @param[in] binding Tensor's binding point.
- * @param[in] shift Number of bits to be shift for offset calculation
- */
- BufferParam(const unsigned int binding, const unsigned int shift)
- : binding_point(binding), buffer_data_type_shift(shift)
- {
- }
- };
-
/** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
@@ -74,15 +55,6 @@ public:
*/
void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
- /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] param Additional parameter for GLES SSBO buffer.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
-
/** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
@@ -92,15 +64,6 @@ public:
*/
void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
- /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] param Additional parameter for GLES SSBO buffer.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
-
/** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
@@ -110,15 +73,6 @@ public:
*/
void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
- /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] param Additional parameter for GLES SSBO buffer.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
-
/** Returns the number of arguments enqueued per 1D tensor object.
*
* @return The number of arguments enqueues per 1D tensor object.
@@ -154,13 +108,13 @@ public:
private:
/** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.
*
- * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] param Additional parameter for GLES SSBO buffer.
- * @param[in] window Window the kernel will be executed on.
+ * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+ * @param[in] tensor Tensor to set as an argument of the object's kernel.
+ * @param[in] binding_point Tensor's binding point in this kernel.
+ * @param[in] window Window the kernel will be executed on.
*/
template <unsigned int dimension_size>
- void add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
+ void add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
/** Returns the number of arguments enqueued per tensor object.
*
diff --git a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
index 7766f95bcc..4c84c674fc 100644
--- a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
+++ b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -380,21 +380,9 @@ const std::string GCKernelLibrary::preprocess_shader(const std::string &shader_s
FIRST,
SKIP_COMMENTS = FIRST,
RESOLVE_INCLUDES,
- SKIP_PREPROCESSOR_DIRECTIVES,
- SEARCH_MACRO_DEFINITIONS,
- EXPAND_MACRO_USES,
LAST
};
- struct MacroDefinitionInfo
- {
- const std::vector<std::string> param_list;
- const std::string content;
- };
-
- // Found macro definitions so far
- std::map<const std::string, const MacroDefinitionInfo> macro_definitions;
-
// Define a GLES compute shader parser function
std::function<std::string(const std::string &, ParserStage, int)> cs_parser;
cs_parser = [&](const std::string & src, ParserStage stage, int nested_level) -> std::string
@@ -416,35 +404,6 @@ const std::string GCKernelLibrary::preprocess_shader(const std::string &shader_s
case ParserStage::RESOLVE_INCLUDES:
search_pattern = R"rgx((?:^|\n)[ \t]*#include "(.*)")rgx";
break;
- case ParserStage::SKIP_PREPROCESSOR_DIRECTIVES:
- search_pattern = R"((^|\n)[ \t]*(#ifdef|#ifndef|#if)[^\n]+)";
- break;
- case ParserStage::SEARCH_MACRO_DEFINITIONS:
- search_pattern = R"((?:^|\n)[ \t]*#define[ \t]+(\w+)(?:\((\w+(?:[ \t]*,[ \t]*\w+)*)\))?(?: |\t|\\\n)*((?:(?:[^\\\n]|\\[^\n])*\\+\n)*(?:[ \t]*[^ \t\n]+)*)[ \t]*)";
- break;
- case ParserStage::EXPAND_MACRO_USES:
- {
- if(macro_definitions.empty())
- {
- // Nothing to expand
- return src;
- }
- int i = 0;
- for(auto &def : macro_definitions)
- {
- if(i == 0)
- {
- search_pattern = R"((\b)" + def.first;
- }
- else
- {
- search_pattern += R"(\b|\b)" + def.first;
- }
- i++;
- }
- search_pattern += R"(\b))";
- break;
- }
default:
break;
}
@@ -469,126 +428,7 @@ const std::string GCKernelLibrary::preprocess_shader(const std::string &shader_s
dst.append(cs_parser(read_file(source_name, false), ParserStage::FIRST, 0));
break;
}
- case ParserStage::SEARCH_MACRO_DEFINITIONS:
- {
- std::regex params_regex(R"(\b\w+\b)");
- const std::string macro_param_str = match.str(2);
- const std::vector<std::string> macro_param_list(
- std::sregex_token_iterator(macro_param_str.begin(),
- macro_param_str.end(),
- params_regex),
- std::sregex_token_iterator());
-
- const MacroDefinitionInfo info =
- {
- macro_param_list,
- match.str(3)
- };
- // Collect the macro definition data and not change the shader source
- macro_definitions.insert(std::pair<const std::string, const MacroDefinitionInfo>(match.str(1), info));
- dst.append(match.str());
- break;
- }
- case ParserStage::EXPAND_MACRO_USES:
- {
- ptrdiff_t args_str_length = 0;
- std::vector<std::string> args_list;
-
- // Walk through argument list, because the regular expression does NOT support nested parentheses
- size_t cur_args_str_pos = match.position() + match.length();
- if(src[cur_args_str_pos++] == '(')
- {
- int nested_parentheses = 0;
- ptrdiff_t cur_arg_pos = cur_args_str_pos;
- ptrdiff_t cur_arg_length = 0;
-
- args_str_length++;
- while(src[cur_args_str_pos] != ')' || nested_parentheses != 0)
- {
- switch(src[cur_args_str_pos++])
- {
- case '(':
- nested_parentheses++;
- cur_arg_length++;
- break;
- case ',':
- if(nested_parentheses == 0)
- {
- args_list.push_back(src.substr(cur_arg_pos, cur_arg_length));
- cur_arg_pos = cur_args_str_pos;
- cur_arg_length = 0;
- }
- else
- {
- cur_arg_length++;
- }
- break;
- case ' ':
- case '\t':
- if(cur_arg_length == 0)
- {
- cur_arg_pos++;
- }
- else
- {
- cur_arg_length++;
- }
- break;
- case ')':
- nested_parentheses--;
- // no break here!
- default:
- cur_arg_length++;
- break;
- }
- args_str_length++;
- }
- if(src[cur_args_str_pos] == ')' && nested_parentheses == 0)
- {
- args_list.push_back(src.substr(cur_arg_pos, cur_arg_length));
- }
- args_str_length++;
- }
-
- std::string expanded_content = match.str();
- const std::vector<std::string> macro_param_list = macro_definitions.at(match.str()).param_list;
-
- if((nested_level != 0 || !macro_param_list.empty()) && macro_param_list.size() == args_list.size())
- {
- parsed_pos += args_str_length;
- expanded_content = macro_definitions.at(match.str()).content;
- size_t i = 0;
- for(auto &param_name : macro_param_list)
- {
- std::regex params_regex(R"(\b)" + param_name + R"(\b)");
- expanded_content.assign(std::regex_replace(expanded_content, params_regex, args_list[i]));
- ++i;
- }
- // Expand macro recursively
- expanded_content = cs_parser(expanded_content, stage, nested_level + 1);
-
- if(nested_level == 0)
- {
- const std::regex token_pasting_rgx = std::regex(R"(\b##\b)");
- if(std::regex_search(expanded_content, token_pasting_rgx))
- {
- // Remove token pasting operator "##"
- expanded_content.assign(std::regex_replace(expanded_content, std::regex(token_pasting_rgx), ""));
- // Trim trailing whitespace
- expanded_content.assign(std::regex_replace(expanded_content, std::regex(R"([ \t]*\\\n)"), "\n"));
- }
- else
- {
- // Do not expand the macro if the result does not have token pasting operator "##"
- expanded_content = src.substr(match.position(), match.length() + args_str_length);
- }
- }
- }
- dst.append(expanded_content);
- break;
- }
case ParserStage::SKIP_COMMENTS:
- case ParserStage::SKIP_PREPROCESSOR_DIRECTIVES:
default:
dst.append(match.str());
break;
@@ -622,12 +462,7 @@ const GCProgram &GCKernelLibrary::load_program(const std::string &program_name)
ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
}
- // TODO(APPBROWSER-298): Do not call shader preprocessor here
- // We should do the preprocess at compile time
- // The preprocess_shader function is used for support "#include" directive and token pasting operator "##".
- // This job could be done at compile time by using a python script in order to get better performance at runtime.
- // BTW: We usually defined EMBEDDED_KERNELS in release build.
- program = GCProgram(program_name, preprocess_shader(program_source_it->second));
+ program = GCProgram(program_name, program_source_it->second);
#else /* EMBEDDED_KERNELS */
// Check for binary
std::string source_name = _shader_path + program_name;
diff --git a/src/core/GLES_COMPUTE/IGCKernel.cpp b/src/core/GLES_COMPUTE/IGCKernel.cpp
index df9c798e42..6666c0f3ae 100644
--- a/src/core/GLES_COMPUTE/IGCKernel.cpp
+++ b/src/core/GLES_COMPUTE/IGCKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -79,7 +79,7 @@ unsigned int IGCKernel::num_arguments_per_tensor() const
}
template <unsigned int dimension_size>
-void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window)
+void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
{
ARM_COMPUTE_ERROR_ON(tensor == nullptr);
@@ -103,7 +103,6 @@ void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor,
}
_kernel.set_argument(idx++, offset_first_element);
- _kernel.set_argument(idx++, param.buffer_data_type_shift);
// Rounding up the tensor attributes structure in compute shader to a multiple of a vec4
unsigned int idx_end = ceil_to_multiple(idx, 4);
@@ -113,7 +112,7 @@ void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor,
}
idx = idx_end;
- ARM_COMPUTE_GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, param.binding_point, tensor->gc_buffer()));
+ ARM_COMPUTE_GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, binding_point, tensor->gc_buffer()));
ARM_COMPUTE_ERROR_ON_MSG(idx_start + num_arguments_per_tensor<dimension_size>() != idx,
"add_%dD_tensor_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_tensor<dimension_size>());
@@ -122,32 +121,17 @@ void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor,
void IGCKernel::add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
{
- add_tensor_argument<1>(idx, tensor, BufferParam(binding_point, 0), window);
-}
-
-void IGCKernel::add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window)
-{
- add_tensor_argument<1>(idx, tensor, param, window);
+ add_tensor_argument<1>(idx, tensor, binding_point, window);
}
void IGCKernel::add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
{
- add_tensor_argument<2>(idx, tensor, BufferParam(binding_point, 0), window);
-}
-
-void IGCKernel::add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window)
-{
- add_tensor_argument<2>(idx, tensor, param, window);
+ add_tensor_argument<2>(idx, tensor, binding_point, window);
}
void IGCKernel::add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
{
- add_tensor_argument<3>(idx, tensor, BufferParam(binding_point, 0), window);
-}
-
-void IGCKernel::add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window)
-{
- add_tensor_argument<3>(idx, tensor, param, window);
+ add_tensor_argument<3>(idx, tensor, binding_point, window);
}
unsigned int IGCKernel::num_arguments_per_1D_tensor() const
diff --git a/src/core/GLES_COMPUTE/cs_shaders/helpers.h b/src/core/GLES_COMPUTE/cs_shaders/helpers.h
deleted file mode 100644
index ba27eec716..0000000000
--- a/src/core/GLES_COMPUTE/cs_shaders/helpers.h
+++ /dev/null
@@ -1,584 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_HELPER_H
-#define ARM_COMPUTE_HELPER_H
-
-#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
-
-#define VEC_DATA_TYPE_STR(type, size) type##size
-#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
-
-#define CONVERT(x, type) type(x)
-
-#define PACK(value, stype, dtype) \
- pack_##stype##_##dtype(value)
-
-#define UNPACK(value, stype, dtype) \
- unpack_##stype##_##dtype(value)
-
-#define BUFFER_DECLARATION(name, location, type, access) \
- layout(std430, binding = location) access buffer name##Buffer \
- { \
- type name##_ptr[]; \
- }
-
-#define VECTOR_PARAM_DECLARATION(name) \
- uint name##_stride_x; \
- uint name##_step_x; \
- uint name##_offset_first_element_in_bytes; \
- uint name##_buffer_data_type_size
-
-#define IMAGE_PARAM_DECLARATION(name) \
- uint name##_stride_x; \
- uint name##_step_x; \
- uint name##_stride_y; \
- uint name##_step_y; \
- uint name##_offset_first_element_in_bytes; \
- uint name##_buffer_data_type_size; \
- uint name##_padding1; \
- uint name##_padding2
-
-#define TENSOR3D_PARAM_DECLARATION(name) \
- uint name##_stride_x; \
- uint name##_step_x; \
- uint name##_stride_y; \
- uint name##_step_y; \
- uint name##_stride_z; \
- uint name##_step_z; \
- uint name##_offset_first_element_in_bytes; \
- uint name##_buffer_data_type_size
-
-/** Structure to hold Vector information */
-struct Vector
-{
- uint current_offset; /**< Current offset of vector */
- uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */
- uint stride_x; /**< Stride of the image in X dimension (in bytes) */
-};
-
-/** Structure to hold Image information */
-struct Image
-{
- uint current_offset; /**< Current offset of image */
- uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */
- uint stride_x; /**< Stride of the image in X dimension (in bytes) */
- uint stride_y; /**< Stride of the image in Y dimension (in bytes) */
-};
-
-/** Structure to hold 3D tensor information */
-struct Tensor3D
-{
- uint current_offset; /**< Current offset of tensor */
- uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */
- uint stride_x; /**< Stride of the image in X dimension (in bytes) */
- uint stride_y; /**< Stride of the image in Y dimension (in bytes) */
- uint stride_z; /**< Stride of the image in Z dimension (in bytes) */
-};
-
-/////////////////////////////////////////////////////////////
-// TODO: old to be removed
-
-#define CONVERT_TO_VECTOR_STRUCT(name) \
- update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
-
-#define CONVERT_TO_VECTOR_STRUCT_FP16(name) \
- update_vector_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
-
-#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
- update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0))
-
-#define CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(name) \
- update_vector_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0))
-
-#define CONVERT_TO_IMAGE_STRUCT(name) \
- update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
-
-#define CONVERT_TO_IMAGE_STRUCT_FP16(name) \
- update_image_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
-
-#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
- update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0))
-
-#define CONVERT_TO_IMAGE_STRUCT_NO_STEP_FP16(name) \
- update_image_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0))
-
-#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
- update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z)
-
-#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP_FP16(name) \
- update_image_from_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z)
-
-#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
- update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
-
-#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_FP16(name) \
- update_image_from_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
-
-#define CONVERT_TO_TENSOR3D_STRUCT(name) \
- update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
- name##_stride_z, name##_step_z)
-
-#define CONVERT_TO_TENSOR3D_STRUCT_FP16(name) \
- update_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
- name##_stride_z, name##_step_z)
-
-#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
- update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0))
-
-#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(name) \
- update_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0))
-
-// FIXME: Redesign the macros if different data types are supported.
-#define LOAD4(name, offset) \
- name##_ptr[offset]
-
-#define STORE4(name, offset, value) \
- name##_ptr[offset] = value
-
-// Load 1 element, which size is determined by ssbo type.
-#define LOAD1(r, name, offset) \
- r = name##_ptr[offset]
-
-#define STORE1(name, offset, value) \
- name##_ptr[offset] = value
-
-#define LOAD2(r, name, offset) \
- LOAD1(r[0], name, offset); \
- LOAD1(r[1], name, (offset) + uint(1))
-
-#define STORE2(name, offset, value) \
- name##_ptr[offset] = value[0]; \
- name##_ptr[(offset) + uint(1)] = value[1]
-
-#define LOAD3(r, name, offset) \
- LOAD1(r[0], name, offset); \
- LOAD1(r[1], name, (offset) + uint(1)); \
- LOAD1(r[2], name, (offset) + uint(2))
-
-#define CURRENT_OFFSET(name) \
- name.current_offset
-
-/** Wrap vector information into an Vector structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
- * @param[in] stride_x Stride of the vector in X dimension (in bytes)
- * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
- *
- * @return An vector object
- */
-Vector update_vector_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x)
-{
- Vector vector;
- vector.offset_first_element_in_bytes = offset_first_element_in_bytes;
- vector.stride_x = stride_x;
- vector.current_offset = (vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x) >> 2;
-
- return vector;
-}
-
-/** Wrap vector information into an Vector structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
- * @param[in] stride_x Stride of the vector in X dimension (in bytes)
- * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
- *
- * @return An vector object
- */
-Vector update_vector_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x)
-{
- Vector vector;
- vector.offset_first_element_in_bytes = offset_first_element_in_bytes;
- vector.stride_x = stride_x;
- vector.current_offset = vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x;
-
- return vector;
-}
-
-/** Wrap image information into an Image structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x Stride of the image in X dimension (in bytes)
- * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] stride_y Stride of the image in Y dimension (in bytes)
- * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
- *
- * @return An image object
- */
-Image update_image_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
-{
- Image img;
- img.offset_first_element_in_bytes = offset_first_element_in_bytes;
- img.stride_x = stride_x;
- img.stride_y = stride_y;
- img.current_offset = (img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y) >> 2;
-
- return img;
-}
-
-/** Wrap image information into an Image structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x Stride of the image in X dimension (in bytes)
- * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] stride_y Stride of the image in Y dimension (in bytes)
- * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
- *
- * @return An image object
- */
-Image update_image_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
-{
- Image img;
- img.offset_first_element_in_bytes = offset_first_element_in_bytes;
- img.stride_x = stride_x;
- img.stride_y = stride_y;
- img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y;
-
- return img;
-}
-
-/** Wrap 3D tensor information into an image structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x Stride of the image in X dimension (in bytes)
- * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] stride_y Stride of the image in Y dimension (in bytes)
- * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] stride_z Stride of the image in Z dimension (in bytes)
- * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
- *
- * @return A 2D Image object
- */
-Image update_image_from_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
-{
- Image img;
- img.offset_first_element_in_bytes = offset_first_element_in_bytes;
- img.stride_x = stride_x;
- img.stride_y = stride_y;
- img.current_offset = (img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z) >> 2;
-
- return img;
-}
-
-/** Wrap 3D tensor information into an image structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x Stride of the image in X dimension (in bytes)
- * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] stride_y Stride of the image in Y dimension (in bytes)
- * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] stride_z Stride of the image in Z dimension (in bytes)
- * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
- *
- * @return A 2D Image object
- */
-Image update_image_from_tensor3D_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
-{
- Image img;
- img.offset_first_element_in_bytes = offset_first_element_in_bytes;
- img.stride_x = stride_x;
- img.stride_y = stride_y;
- img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
-
- return img;
-}
-
-/** Wrap 3D tensor information into an tensor structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x Stride of the image in X dimension (in bytes)
- * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] stride_y Stride of the image in Y dimension (in bytes)
- * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] stride_z Stride of the image in Z dimension (in bytes)
- * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
- *
- * @return A 3D tensor object
- */
-Tensor3D update_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
-{
- Tensor3D tensor;
- tensor.offset_first_element_in_bytes = offset_first_element_in_bytes;
- tensor.stride_x = stride_x;
- tensor.stride_y = stride_y;
- tensor.stride_z = stride_z;
- tensor.current_offset = (tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z) >> 2;
-
- return tensor;
-}
-
-/** Wrap 3D tensor information into an tensor structure, and make the offset to be this workitem's position.
- *
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x Stride of the image in X dimension (in bytes)
- * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] stride_y Stride of the image in Y dimension (in bytes)
- * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] stride_z Stride of the image in Z dimension (in bytes)
- * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes)
- *
- * @return A 3D tensor object
- */
-Tensor3D update_tensor3D_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
-{
- Tensor3D tensor;
- tensor.offset_first_element_in_bytes = offset_first_element_in_bytes;
- tensor.stride_x = stride_x;
- tensor.stride_y = stride_y;
- tensor.stride_z = stride_z;
- tensor.current_offset = tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
-
- return tensor;
-}
-
-/** Get the pointer position of a Vector
- *
- * @param[in] vec Pointer to the starting position of the buffer
- * @param[in] x Relative X position
- */
-uint vector_offset(Vector vec, int x)
-{
- return CONVERT(CONVERT(vec.current_offset << 2, int) + x * CONVERT(vec.stride_x, int), uint) >> 2;
-}
-
-/** Get the pointer position of a Vector
- *
- * @param[in] vec Pointer to the starting position of the buffer
- * @param[in] x Relative X position
- */
-uint vector_offset_fp16(Vector vec, int x)
-{
- return CONVERT(CONVERT(vec.current_offset, int) + x * CONVERT(vec.stride_x, int), uint);
-}
-
-/** Get the pointer position of a Image
- *
- * @param[in] img Pointer to the starting position of the buffer
- * @param[in] x Relative X position
- * @param[in] y Relative Y position
- */
-uint offset(Image img, int x, int y)
-{
- return CONVERT(CONVERT(img.current_offset << 2, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint) >> 2;
-}
-
-/** Get the pointer position of a Image
- *
- * @param[in] img Pointer to the starting position of the buffer
- * @param[in] x Relative X position
- * @param[in] y Relative Y position
- */
-uint offset_fp16(Image img, int x, int y)
-{
- return CONVERT(CONVERT(img.current_offset, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint);
-}
-
-/** Get the pointer position of a Tensor3D
- *
- * @param[in] tensor Pointer to the starting postion of the buffer
- * @param[in] x Relative X position
- * @param[in] y Relative Y position
- * @param[in] z Relative Z position
- */
-uint tensor3D_offset(Tensor3D tensor, int x, int y, int z)
-{
- return CONVERT(CONVERT(tensor.current_offset << 2, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint) >> 2;
-}
-
-/** Get the pointer position of a Tensor3D
- *
- * @param[in] tensor Pointer to the starting postion of the buffer
- * @param[in] x Relative X position
- * @param[in] y Relative Y position
- * @param[in] z Relative Z position
- */
-uint tensor3D_offset_fp16(Tensor3D tensor, int x, int y, int z)
-{
- return CONVERT(CONVERT(tensor.current_offset, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint);
-}
-
-/////////////////////////////////////////////////////////////
-// new one
-
-#define GC_CONVERT_TO_VECTOR_STRUCT(name) \
- gc_update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
-
-#define GC_CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
- gc_update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0))
-
-#define GC_CONVERT_TO_IMAGE_STRUCT(name) \
- gc_update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
-
-#define GC_CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
- gc_update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0))
-
-#define GC_CONVERT_TO_TENSOR3D_STRUCT(name) \
- gc_update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
- name##_stride_z, name##_step_z)
-
-#define GC_CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
- gc_update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0))
-
-#define GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
- gc_update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
-
-#define GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
- gc_update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z)
-
-Vector gc_update_vector_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x)
-{
- Vector vector;
- vector.offset_first_element_in_bytes = offset_first_element_in_bytes;
- vector.stride_x = stride_x;
- vector.current_offset = vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x;
-
- return vector;
-}
-
-Image gc_update_image_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
-{
- Image img;
- img.offset_first_element_in_bytes = offset_first_element_in_bytes;
- img.stride_x = stride_x;
- img.stride_y = stride_y;
- img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y;
-
- return img;
-}
-
-Tensor3D gc_update_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
-{
- Tensor3D tensor;
- tensor.offset_first_element_in_bytes = offset_first_element_in_bytes;
- tensor.stride_x = stride_x;
- tensor.stride_y = stride_y;
- tensor.stride_z = stride_z;
- tensor.current_offset = tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
-
- return tensor;
-}
-
-Image gc_update_image_from_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
-{
- Image img;
- img.offset_first_element_in_bytes = offset_first_element_in_bytes;
- img.stride_x = stride_x;
- img.stride_y = stride_y;
- img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
-
- return img;
-}
-
-#define GC_CURRENT_OFFSET(name) \
- name.current_offset
-
-uint gc_vector_offset(Vector vec, int x)
-{
- return CONVERT(CONVERT(vec.current_offset, int) + x * CONVERT(vec.stride_x, int), uint);
-}
-
-uint gc_image_offset(Image img, int x, int y)
-{
- return CONVERT(CONVERT(img.current_offset, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint);
-}
-
-uint gc_tensor3D_offset(Tensor3D tensor, int x, int y, int z)
-{
- return CONVERT(CONVERT(tensor.current_offset, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint);
-}
-
-// load/store number of element depends on buffer type
-#define GC_LOAD1(r, name, offset) \
- r = name##_ptr[offset]
-
-#define GC_LOAD2(r, name, offset) \
- GC_LOAD1(r[0], name, offset); \
- GC_LOAD1(r[1], name, (offset) + uint(1))
-
-#define GC_LOAD3(r, name, offset) \
- GC_LOAD1(r[0], name, offset); \
- GC_LOAD1(r[1], name, (offset) + uint(1)); \
- GC_LOAD1(r[2], name, (offset) + uint(2))
-
-#define GC_STORE1(value, name, offset) \
- name##_ptr[offset] = value
-
-#define GC_STORE2(value, name, offset) \
- GC_STORE1(value[0], name, offset); \
- GC_STORE1(value[1], name, (offset) + uint(1))
-
-#define GC_STORE3(value, name, offset) \
- GC_STORE1(value[0], name, offset); \
- GC_STORE1(value[1], name, (offset) + uint(1)); \
- GC_STORE1(value[2], name, (offset) + uint(2))
-
-// has to manually expand them since not supported by compiler
-#define GC_LOAD1_1D_OFFSET(r, name, x) \
- GC_LOAD1(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD1_2D_OFFSET(r, name, x, y) \
- GC_LOAD1(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD1_3D_OFFSET(r, name, x, y, z) \
- GC_LOAD1(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
-
-#define GC_STORE1_1D_OFFSET(value, name, x) \
- GC_STORE1(value, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
-
-#define GC_STORE1_2D_OFFSET(value, name, x, y) \
- GC_STORE1(value, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
-
-#define GC_STORE1_3D_OFFSET(value, name, x, y, z) \
- GC_STORE1(value, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD2_1D_OFFSET(r, name, x) \
- GC_LOAD2(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD2_2D_OFFSET(r, name, x, y) \
- GC_LOAD2(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD2_3D_OFFSET(r, name, x, y, z) \
- GC_LOAD2(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
-
-#define GC_STORE2_1D_OFFSET(value, name, x) \
- GC_STORE2(value, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
-
-#define GC_STORE2_2D_OFFSET(value, name, x, y) \
- GC_STORE2(value, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
-
-#define GC_STORE2_3D_OFFSET(value, name, x, y, z) \
- GC_STORE2(value, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD3_1D_OFFSET(r, name, x) \
- GC_LOAD3(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD3_2D_OFFSET(r, name, x, y) \
- GC_LOAD3(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
-
-#define GC_LOAD3_3D_OFFSET(r, name, x, y, z) \
- GC_LOAD3(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
-
-/////////////////////////////////////////////////////////////
-
-#endif // _HELPER_H
diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
index a6111782fd..1aac2502e7 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -122,18 +122,9 @@ void GCDepthConcatenateLayerKernel::run(const Window &window)
do
{
- if(_input->info()->data_type() == DataType::F32)
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, 1, slice);
- add_3D_tensor_argument(idx, _output, 2, slice);
- }
- else if(_input->info()->data_type() == DataType::F16)
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice);
- add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice);
- }
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, _input, 1, slice);
+ add_3D_tensor_argument(idx, _output, 2, slice);
_kernel.update_shader_params();
diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
index 06f9bce498..77423fd8bc 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -397,69 +397,21 @@ void GCDirectConvolutionLayerKernel<kernel_size>::run(const Window &window)
Window slice_in = win_in.first_slice_window_3D();
unsigned int idx1 = 2 * num_arguments_per_3D_tensor();
- add_3D_tensor_argument(idx1, _weights, BufferParam(3, 2), slice);
+ add_3D_tensor_argument(idx1, _weights, 3, slice);
if(_bias != nullptr)
{
Window slice_bias;
slice_bias.use_tensor_dimensions(_bias->info()->tensor_shape());
- add_1D_tensor_argument(idx1, _bias, BufferParam(4, 2), slice_bias);
+ add_1D_tensor_argument(idx1, _bias, 4, slice_bias);
}
do
{
unsigned int idx = 0;
- switch(_input->info()->data_type())
- {
- case DataType::F16:
- switch(kernel_size)
- {
- case 1:
- add_3D_tensor_argument(idx, _input, BufferParam(1, 4), slice_in);
- add_3D_tensor_argument(idx, _output, BufferParam(2, 4), slice);
- break;
-
- case 3:
- add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice_in);
- add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice);
- break;
-
- case 5:
- add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice_in);
- add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice);
- break;
-
- default:
- ARM_COMPUTE_ERROR("Current kernel size %d is not supported", kernel_size);
- break;
- }
- break;
-
- case DataType::F32:
- switch(kernel_size)
- {
- case 1:
- case 5:
- add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice_in);
- add_3D_tensor_argument(idx, _output, BufferParam(2, 2), slice);
- break;
-
- case 3:
- add_3D_tensor_argument(idx, _input, BufferParam(1, 4), slice_in);
- add_3D_tensor_argument(idx, _output, BufferParam(2, 4), slice);
- break;
-
- default:
- ARM_COMPUTE_ERROR("Current kernel size %d is not supported", kernel_size);
- break;
- }
- break;
-
- default:
- ARM_COMPUTE_ERROR("Current data type is not supported");
- break;
- }
+ add_3D_tensor_argument(idx, _input, 1, slice_in);
+ add_3D_tensor_argument(idx, _output, 2, slice);
_kernel.update_shader_params();
enqueue(*this, slice, _lws);
diff --git a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp
index e87c902281..cd06be2585 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -96,9 +96,9 @@ void GCDropoutLayerKernel::run(const Window &window)
{
unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice);
- add_3D_tensor_argument(idx, _mask, BufferParam(2, 2), slice);
- add_3D_tensor_argument(idx, _output, BufferParam(3, 2), slice);
+ add_3D_tensor_argument(idx, _input, 1, slice);
+ add_3D_tensor_argument(idx, _mask, 2, slice);
+ add_3D_tensor_argument(idx, _output, 3, slice);
_kernel.update_shader_params();
enqueue(*this, slice);
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
index 944585daff..7ee39346f8 100644
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -117,26 +117,9 @@ void GCGEMMMatrixAccumulateBiasesKernel::run(const Window &window)
{
// Set arguments
unsigned int idx = 0;
- if(_accum->info()->data_type() == DataType::F32)
- {
- add_2D_tensor_argument(idx, _accum, 1, accum_slice);
- add_1D_tensor_argument(idx, _biases, 2, biases_slice);
- }
- else if(_accum->info()->data_type() == DataType::F16)
- {
-#if defined(ACCUM_PROCESS_4X)
- BufferParam param = { 1, 3 };
- add_2D_tensor_argument(idx, _accum, param, accum_slice);
- param.binding_point = 2;
- add_1D_tensor_argument(idx, _biases, param, biases_slice);
-#elif defined(ACCUM_PROCESS_8X) /* ACCUM_PROCESS_4X */
- BufferParam param = { 1, 4 };
- add_2D_tensor_argument(idx, _accum, param, accum_slice);
- param.binding_point = 2;
- add_1D_tensor_argument(idx, _biases, param, biases_slice);
-#endif /* ACCUM_PROCESS_4X */
- }
+ add_2D_tensor_argument(idx, _accum, 1, accum_slice);
+ add_1D_tensor_argument(idx, _biases, 2, biases_slice);
_kernel.update_shader_params();
enqueue(*this, accum_slice, _lws);
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
index baf1a9b85e..32fbbfeefb 100644
--- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -201,35 +201,10 @@ void GCGEMMMatrixMultiplyKernel::run(const Window &window)
}
unsigned int idx = 0;
- switch(_input0->info()->data_type())
- {
- case DataType::F16:
-#if defined(MM_PROCESS_4X)
- add_2D_tensor_argument(idx, _input0, BufferParam(1, 2), slice);
- add_2D_tensor_argument(idx, _input1, BufferParam(2, 3), slice_b);
- add_2D_tensor_argument(idx, _output, BufferParam(3, 3), slice);
-#elif defined(MM_PROCESS_4X_OPTIMIZED) /* MM_PROCESS_4X */
- add_2D_tensor_argument(idx, _input0, BufferParam(1, 4), slice);
- add_2D_tensor_argument(idx, _input1, BufferParam(2, 3), slice_b);
- add_2D_tensor_argument(idx, _output, BufferParam(3, 3), slice);
-#elif defined(MM_PROCESS_8X) /* MM_PROCESS_4X */
- add_2D_tensor_argument(idx, _input0, BufferParam(1, 4), slice);
- add_2D_tensor_argument(idx, _input1, BufferParam(2, 4), slice_b);
- add_2D_tensor_argument(idx, _output, BufferParam(3, 4), slice);
-#endif /* MM_PROCESS_4X */
- break;
-
- case DataType::F32:
- add_2D_tensor_argument(idx, _input0, BufferParam(1, 2), slice);
- add_2D_tensor_argument(idx, _input1, BufferParam(2, 2), slice_b);
- add_2D_tensor_argument(idx, _output, BufferParam(3, 2), slice);
- break;
-
- default:
- ARM_COMPUTE_ERROR("Current data type is not supported");
- break;
- }
+ add_2D_tensor_argument(idx, _input0, 1, slice);
+ add_2D_tensor_argument(idx, _input1, 2, slice_b);
+ add_2D_tensor_argument(idx, _output, 3, slice);
_kernel.update_shader_params();
enqueue(*this, slice);
}
diff --git a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
index 5edc23b95e..b3a7a90931 100644
--- a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -132,26 +132,9 @@ void GCTransposeKernel::run(const Window &window)
do
{
unsigned int idx = 0;
- if(_input->info()->data_type() == DataType::F32)
- {
- add_2D_tensor_argument(idx, _input, 1, slice);
- add_2D_tensor_argument(idx, _output, 2, slice);
- }
- else if(_input->info()->data_type() == DataType::F16)
- {
-#if defined(TRANSPOSE_4X4)
- BufferParam param = { 1, 3 };
- add_2D_tensor_argument(idx, _input, param, slice);
- param.binding_point = 2;
- add_2D_tensor_argument(idx, _output, param, slice);
-#elif defined(TRANSPOSE_8X8) /* TRANSPOSE_4X4 */
- BufferParam param = { 1, 4 };
- add_2D_tensor_argument(idx, _input, param, slice);
- param.binding_point = 2;
- add_2D_tensor_argument(idx, _output, param, slice);
-#endif /* TRANSPOSE_4X4 */
- }
+ add_2D_tensor_argument(idx, _input, 1, slice);
+ add_2D_tensor_argument(idx, _output, 2, slice);
_kernel.update_shader_params();
enqueue(*this, slice, _lws_hint);
}