From abd03cfc7ba96462bc5a1ca6f4faa9ca22792158 Mon Sep 17 00:00:00 2001 From: Joel Liang Date: Mon, 8 Jan 2018 15:20:48 +0800 Subject: APPBROWSER-298: Remove the old shader common code Remove token pasting operator support for GLES shader Remove cs_shdaers/helpers.h (The old GLES shader common code) Remove class BufferParam. We don't need to pass the buffer_data_type_shift to GLES shader. Change-Id: Ic4fa6b2fb7647b8f69759f6077ae4a5b483cc04d Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/115448 Tested-by: Jenkins Reviewed-by: Frank Lei Reviewed-by: Anthony Barbier --- src/core/GLES_COMPUTE/GCKernelLibrary.cpp | 169 +----- src/core/GLES_COMPUTE/IGCKernel.cpp | 28 +- src/core/GLES_COMPUTE/cs_shaders/helpers.h | 584 --------------------- .../kernels/GCDepthConcatenateLayerKernel.cpp | 17 +- .../kernels/GCDirectConvolutionLayerKernel.cpp | 58 +- .../GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp | 8 +- .../kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp | 23 +- .../kernels/GCGEMMMatrixMultiplyKernel.cpp | 33 +- .../GLES_COMPUTE/kernels/GCTransposeKernel.cpp | 23 +- 9 files changed, 31 insertions(+), 912 deletions(-) delete mode 100644 src/core/GLES_COMPUTE/cs_shaders/helpers.h (limited to 'src/core/GLES_COMPUTE') diff --git a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp index 7766f95bcc..4c84c674fc 100644 --- a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp +++ b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017, 2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -380,21 +380,9 @@ const std::string GCKernelLibrary::preprocess_shader(const std::string &shader_s FIRST, SKIP_COMMENTS = FIRST, RESOLVE_INCLUDES, - SKIP_PREPROCESSOR_DIRECTIVES, - SEARCH_MACRO_DEFINITIONS, - EXPAND_MACRO_USES, LAST }; - struct MacroDefinitionInfo - { - const std::vector param_list; - const std::string content; - }; - - // Found macro definitions so far - std::map macro_definitions; - // Define a GLES compute shader parser function std::function cs_parser; cs_parser = [&](const std::string & src, ParserStage stage, int nested_level) -> std::string @@ -416,35 +404,6 @@ const std::string GCKernelLibrary::preprocess_shader(const std::string &shader_s case ParserStage::RESOLVE_INCLUDES: search_pattern = R"rgx((?:^|\n)[ \t]*#include "(.*)")rgx"; break; - case ParserStage::SKIP_PREPROCESSOR_DIRECTIVES: - search_pattern = R"((^|\n)[ \t]*(#ifdef|#ifndef|#if)[^\n]+)"; - break; - case ParserStage::SEARCH_MACRO_DEFINITIONS: - search_pattern = R"((?:^|\n)[ \t]*#define[ \t]+(\w+)(?:\((\w+(?:[ \t]*,[ \t]*\w+)*)\))?(?: |\t|\\\n)*((?:(?:[^\\\n]|\\[^\n])*\\+\n)*(?:[ \t]*[^ \t\n]+)*)[ \t]*)"; - break; - case ParserStage::EXPAND_MACRO_USES: - { - if(macro_definitions.empty()) - { - // Nothing to expand - return src; - } - int i = 0; - for(auto &def : macro_definitions) - { - if(i == 0) - { - search_pattern = R"((\b)" + def.first; - } - else - { - search_pattern += R"(\b|\b)" + def.first; - } - i++; - } - search_pattern += R"(\b))"; - break; - } default: break; } @@ -469,126 +428,7 @@ const std::string GCKernelLibrary::preprocess_shader(const std::string &shader_s dst.append(cs_parser(read_file(source_name, false), ParserStage::FIRST, 0)); break; } - case ParserStage::SEARCH_MACRO_DEFINITIONS: - { - std::regex params_regex(R"(\b\w+\b)"); - const std::string macro_param_str = match.str(2); - const std::vector macro_param_list( - std::sregex_token_iterator(macro_param_str.begin(), - macro_param_str.end(), - params_regex), - std::sregex_token_iterator()); - - const MacroDefinitionInfo info = - { - macro_param_list, - match.str(3) - }; - // Collect the macro definition data and not change the shader source - macro_definitions.insert(std::pair(match.str(1), info)); - dst.append(match.str()); - break; - } - case ParserStage::EXPAND_MACRO_USES: - { - ptrdiff_t args_str_length = 0; - std::vector args_list; - - // Walk through argument list, because the regular expression does NOT support nested parentheses - size_t cur_args_str_pos = match.position() + match.length(); - if(src[cur_args_str_pos++] == '(') - { - int nested_parentheses = 0; - ptrdiff_t cur_arg_pos = cur_args_str_pos; - ptrdiff_t cur_arg_length = 0; - - args_str_length++; - while(src[cur_args_str_pos] != ')' || nested_parentheses != 0) - { - switch(src[cur_args_str_pos++]) - { - case '(': - nested_parentheses++; - cur_arg_length++; - break; - case ',': - if(nested_parentheses == 0) - { - args_list.push_back(src.substr(cur_arg_pos, cur_arg_length)); - cur_arg_pos = cur_args_str_pos; - cur_arg_length = 0; - } - else - { - cur_arg_length++; - } - break; - case ' ': - case '\t': - if(cur_arg_length == 0) - { - cur_arg_pos++; - } - else - { - cur_arg_length++; - } - break; - case ')': - nested_parentheses--; - // no break here! - default: - cur_arg_length++; - break; - } - args_str_length++; - } - if(src[cur_args_str_pos] == ')' && nested_parentheses == 0) - { - args_list.push_back(src.substr(cur_arg_pos, cur_arg_length)); - } - args_str_length++; - } - - std::string expanded_content = match.str(); - const std::vector macro_param_list = macro_definitions.at(match.str()).param_list; - - if((nested_level != 0 || !macro_param_list.empty()) && macro_param_list.size() == args_list.size()) - { - parsed_pos += args_str_length; - expanded_content = macro_definitions.at(match.str()).content; - size_t i = 0; - for(auto ¶m_name : macro_param_list) - { - std::regex params_regex(R"(\b)" + param_name + R"(\b)"); - expanded_content.assign(std::regex_replace(expanded_content, params_regex, args_list[i])); - ++i; - } - // Expand macro recursively - expanded_content = cs_parser(expanded_content, stage, nested_level + 1); - - if(nested_level == 0) - { - const std::regex token_pasting_rgx = std::regex(R"(\b##\b)"); - if(std::regex_search(expanded_content, token_pasting_rgx)) - { - // Remove token pasting operator "##" - expanded_content.assign(std::regex_replace(expanded_content, std::regex(token_pasting_rgx), "")); - // Trim trailing whitespace - expanded_content.assign(std::regex_replace(expanded_content, std::regex(R"([ \t]*\\\n)"), "\n")); - } - else - { - // Do not expand the macro if the result does not have token pasting operator "##" - expanded_content = src.substr(match.position(), match.length() + args_str_length); - } - } - } - dst.append(expanded_content); - break; - } case ParserStage::SKIP_COMMENTS: - case ParserStage::SKIP_PREPROCESSOR_DIRECTIVES: default: dst.append(match.str()); break; @@ -622,12 +462,7 @@ const GCProgram &GCKernelLibrary::load_program(const std::string &program_name) ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str()); } - // TODO(APPBROWSER-298): Do not call shader preprocessor here - // We should do the preprocess at compile time - // The preprocess_shader function is used for support "#include" directive and token pasting operator "##". - // This job could be done at compile time by using a python script in order to get better performance at runtime. - // BTW: We usually defined EMBEDDED_KERNELS in release build. - program = GCProgram(program_name, preprocess_shader(program_source_it->second)); + program = GCProgram(program_name, program_source_it->second); #else /* EMBEDDED_KERNELS */ // Check for binary std::string source_name = _shader_path + program_name; diff --git a/src/core/GLES_COMPUTE/IGCKernel.cpp b/src/core/GLES_COMPUTE/IGCKernel.cpp index df9c798e42..6666c0f3ae 100644 --- a/src/core/GLES_COMPUTE/IGCKernel.cpp +++ b/src/core/GLES_COMPUTE/IGCKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017, 2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -79,7 +79,7 @@ unsigned int IGCKernel::num_arguments_per_tensor() const } template -void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window) +void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window) { ARM_COMPUTE_ERROR_ON(tensor == nullptr); @@ -103,7 +103,6 @@ void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, } _kernel.set_argument(idx++, offset_first_element); - _kernel.set_argument(idx++, param.buffer_data_type_shift); // Rounding up the tensor attributes structure in compute shader to a multiple of a vec4 unsigned int idx_end = ceil_to_multiple(idx, 4); @@ -113,7 +112,7 @@ void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, } idx = idx_end; - ARM_COMPUTE_GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, param.binding_point, tensor->gc_buffer())); + ARM_COMPUTE_GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, binding_point, tensor->gc_buffer())); ARM_COMPUTE_ERROR_ON_MSG(idx_start + num_arguments_per_tensor() != idx, "add_%dD_tensor_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_tensor()); @@ -122,32 +121,17 @@ void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, void IGCKernel::add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window) { - add_tensor_argument<1>(idx, tensor, BufferParam(binding_point, 0), window); -} - -void IGCKernel::add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window) -{ - add_tensor_argument<1>(idx, tensor, param, window); + add_tensor_argument<1>(idx, tensor, binding_point, window); } void IGCKernel::add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window) { - add_tensor_argument<2>(idx, tensor, BufferParam(binding_point, 0), window); -} - -void IGCKernel::add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window) -{ - add_tensor_argument<2>(idx, tensor, param, window); + add_tensor_argument<2>(idx, tensor, binding_point, window); } void IGCKernel::add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window) { - add_tensor_argument<3>(idx, tensor, BufferParam(binding_point, 0), window); -} - -void IGCKernel::add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window) -{ - add_tensor_argument<3>(idx, tensor, param, window); + add_tensor_argument<3>(idx, tensor, binding_point, window); } unsigned int IGCKernel::num_arguments_per_1D_tensor() const diff --git a/src/core/GLES_COMPUTE/cs_shaders/helpers.h b/src/core/GLES_COMPUTE/cs_shaders/helpers.h deleted file mode 100644 index ba27eec716..0000000000 --- a/src/core/GLES_COMPUTE/cs_shaders/helpers.h +++ /dev/null @@ -1,584 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_HELPER_H -#define ARM_COMPUTE_HELPER_H - -#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val) - -#define VEC_DATA_TYPE_STR(type, size) type##size -#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size) - -#define CONVERT(x, type) type(x) - -#define PACK(value, stype, dtype) \ - pack_##stype##_##dtype(value) - -#define UNPACK(value, stype, dtype) \ - unpack_##stype##_##dtype(value) - -#define BUFFER_DECLARATION(name, location, type, access) \ - layout(std430, binding = location) access buffer name##Buffer \ - { \ - type name##_ptr[]; \ - } - -#define VECTOR_PARAM_DECLARATION(name) \ - uint name##_stride_x; \ - uint name##_step_x; \ - uint name##_offset_first_element_in_bytes; \ - uint name##_buffer_data_type_size - -#define IMAGE_PARAM_DECLARATION(name) \ - uint name##_stride_x; \ - uint name##_step_x; \ - uint name##_stride_y; \ - uint name##_step_y; \ - uint name##_offset_first_element_in_bytes; \ - uint name##_buffer_data_type_size; \ - uint name##_padding1; \ - uint name##_padding2 - -#define TENSOR3D_PARAM_DECLARATION(name) \ - uint name##_stride_x; \ - uint name##_step_x; \ - uint name##_stride_y; \ - uint name##_step_y; \ - uint name##_stride_z; \ - uint name##_step_z; \ - uint name##_offset_first_element_in_bytes; \ - uint name##_buffer_data_type_size - -/** Structure to hold Vector information */ -struct Vector -{ - uint current_offset; /**< Current offset of vector */ - uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */ - uint stride_x; /**< Stride of the image in X dimension (in bytes) */ -}; - -/** Structure to hold Image information */ -struct Image -{ - uint current_offset; /**< Current offset of image */ - uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */ - uint stride_x; /**< Stride of the image in X dimension (in bytes) */ - uint stride_y; /**< Stride of the image in Y dimension (in bytes) */ -}; - -/** Structure to hold 3D tensor information */ -struct Tensor3D -{ - uint current_offset; /**< Current offset of tensor */ - uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */ - uint stride_x; /**< Stride of the image in X dimension (in bytes) */ - uint stride_y; /**< Stride of the image in Y dimension (in bytes) */ - uint stride_z; /**< Stride of the image in Z dimension (in bytes) */ -}; - -///////////////////////////////////////////////////////////// -// TODO: old to be removed - -#define CONVERT_TO_VECTOR_STRUCT(name) \ - update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x) - -#define CONVERT_TO_VECTOR_STRUCT_FP16(name) \ - update_vector_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x) - -#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \ - update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0)) - -#define CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(name) \ - update_vector_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0)) - -#define CONVERT_TO_IMAGE_STRUCT(name) \ - update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y) - -#define CONVERT_TO_IMAGE_STRUCT_FP16(name) \ - update_image_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y) - -#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \ - update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0)) - -#define CONVERT_TO_IMAGE_STRUCT_NO_STEP_FP16(name) \ - update_image_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0)) - -#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \ - update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z) - -#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP_FP16(name) \ - update_image_from_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z) - -#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \ - update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z) - -#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_FP16(name) \ - update_image_from_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z) - -#define CONVERT_TO_TENSOR3D_STRUCT(name) \ - update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \ - name##_stride_z, name##_step_z) - -#define CONVERT_TO_TENSOR3D_STRUCT_FP16(name) \ - update_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \ - name##_stride_z, name##_step_z) - -#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \ - update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0)) - -#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(name) \ - update_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0)) - -// FIXME: Redesign the macros if different data types are supported. -#define LOAD4(name, offset) \ - name##_ptr[offset] - -#define STORE4(name, offset, value) \ - name##_ptr[offset] = value - -// Load 1 element, which size is determined by ssbo type. -#define LOAD1(r, name, offset) \ - r = name##_ptr[offset] - -#define STORE1(name, offset, value) \ - name##_ptr[offset] = value - -#define LOAD2(r, name, offset) \ - LOAD1(r[0], name, offset); \ - LOAD1(r[1], name, (offset) + uint(1)) - -#define STORE2(name, offset, value) \ - name##_ptr[offset] = value[0]; \ - name##_ptr[(offset) + uint(1)] = value[1] - -#define LOAD3(r, name, offset) \ - LOAD1(r[0], name, offset); \ - LOAD1(r[1], name, (offset) + uint(1)); \ - LOAD1(r[2], name, (offset) + uint(2)) - -#define CURRENT_OFFSET(name) \ - name.current_offset - -/** Wrap vector information into an Vector structure, and make the offset to be this workitem's position. - * - * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector - * @param[in] stride_x Stride of the vector in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) - * - * @return An vector object - */ -Vector update_vector_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x) -{ - Vector vector; - vector.offset_first_element_in_bytes = offset_first_element_in_bytes; - vector.stride_x = stride_x; - vector.current_offset = (vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x) >> 2; - - return vector; -} - -/** Wrap vector information into an Vector structure, and make the offset to be this workitem's position. - * - * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector - * @param[in] stride_x Stride of the vector in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) - * - * @return An vector object - */ -Vector update_vector_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x) -{ - Vector vector; - vector.offset_first_element_in_bytes = offset_first_element_in_bytes; - vector.stride_x = stride_x; - vector.current_offset = vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x; - - return vector; -} - -/** Wrap image information into an Image structure, and make the offset to be this workitem's position. - * - * @param[in] offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] stride_x Stride of the image in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] stride_y Stride of the image in Y dimension (in bytes) - * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes) - * - * @return An image object - */ -Image update_image_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y) -{ - Image img; - img.offset_first_element_in_bytes = offset_first_element_in_bytes; - img.stride_x = stride_x; - img.stride_y = stride_y; - img.current_offset = (img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y) >> 2; - - return img; -} - -/** Wrap image information into an Image structure, and make the offset to be this workitem's position. - * - * @param[in] offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] stride_x Stride of the image in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] stride_y Stride of the image in Y dimension (in bytes) - * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes) - * - * @return An image object - */ -Image update_image_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y) -{ - Image img; - img.offset_first_element_in_bytes = offset_first_element_in_bytes; - img.stride_x = stride_x; - img.stride_y = stride_y; - img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y; - - return img; -} - -/** Wrap 3D tensor information into an image structure, and make the offset to be this workitem's position. - * - * @param[in] offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] stride_x Stride of the image in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] stride_y Stride of the image in Y dimension (in bytes) - * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] stride_z Stride of the image in Z dimension (in bytes) - * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes) - * - * @return A 2D Image object - */ -Image update_image_from_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z) -{ - Image img; - img.offset_first_element_in_bytes = offset_first_element_in_bytes; - img.stride_x = stride_x; - img.stride_y = stride_y; - img.current_offset = (img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z) >> 2; - - return img; -} - -/** Wrap 3D tensor information into an image structure, and make the offset to be this workitem's position. - * - * @param[in] offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] stride_x Stride of the image in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] stride_y Stride of the image in Y dimension (in bytes) - * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] stride_z Stride of the image in Z dimension (in bytes) - * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes) - * - * @return A 2D Image object - */ -Image update_image_from_tensor3D_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z) -{ - Image img; - img.offset_first_element_in_bytes = offset_first_element_in_bytes; - img.stride_x = stride_x; - img.stride_y = stride_y; - img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z; - - return img; -} - -/** Wrap 3D tensor information into an tensor structure, and make the offset to be this workitem's position. - * - * @param[in] offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] stride_x Stride of the image in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] stride_y Stride of the image in Y dimension (in bytes) - * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] stride_z Stride of the image in Z dimension (in bytes) - * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes) - * - * @return A 3D tensor object - */ -Tensor3D update_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z) -{ - Tensor3D tensor; - tensor.offset_first_element_in_bytes = offset_first_element_in_bytes; - tensor.stride_x = stride_x; - tensor.stride_y = stride_y; - tensor.stride_z = stride_z; - tensor.current_offset = (tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z) >> 2; - - return tensor; -} - -/** Wrap 3D tensor information into an tensor structure, and make the offset to be this workitem's position. - * - * @param[in] offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] stride_x Stride of the image in X dimension (in bytes) - * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] stride_y Stride of the image in Y dimension (in bytes) - * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] stride_z Stride of the image in Z dimension (in bytes) - * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes) - * - * @return A 3D tensor object - */ -Tensor3D update_tensor3D_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z) -{ - Tensor3D tensor; - tensor.offset_first_element_in_bytes = offset_first_element_in_bytes; - tensor.stride_x = stride_x; - tensor.stride_y = stride_y; - tensor.stride_z = stride_z; - tensor.current_offset = tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z; - - return tensor; -} - -/** Get the pointer position of a Vector - * - * @param[in] vec Pointer to the starting position of the buffer - * @param[in] x Relative X position - */ -uint vector_offset(Vector vec, int x) -{ - return CONVERT(CONVERT(vec.current_offset << 2, int) + x * CONVERT(vec.stride_x, int), uint) >> 2; -} - -/** Get the pointer position of a Vector - * - * @param[in] vec Pointer to the starting position of the buffer - * @param[in] x Relative X position - */ -uint vector_offset_fp16(Vector vec, int x) -{ - return CONVERT(CONVERT(vec.current_offset, int) + x * CONVERT(vec.stride_x, int), uint); -} - -/** Get the pointer position of a Image - * - * @param[in] img Pointer to the starting position of the buffer - * @param[in] x Relative X position - * @param[in] y Relative Y position - */ -uint offset(Image img, int x, int y) -{ - return CONVERT(CONVERT(img.current_offset << 2, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint) >> 2; -} - -/** Get the pointer position of a Image - * - * @param[in] img Pointer to the starting position of the buffer - * @param[in] x Relative X position - * @param[in] y Relative Y position - */ -uint offset_fp16(Image img, int x, int y) -{ - return CONVERT(CONVERT(img.current_offset, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint); -} - -/** Get the pointer position of a Tensor3D - * - * @param[in] tensor Pointer to the starting postion of the buffer - * @param[in] x Relative X position - * @param[in] y Relative Y position - * @param[in] z Relative Z position - */ -uint tensor3D_offset(Tensor3D tensor, int x, int y, int z) -{ - return CONVERT(CONVERT(tensor.current_offset << 2, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint) >> 2; -} - -/** Get the pointer position of a Tensor3D - * - * @param[in] tensor Pointer to the starting postion of the buffer - * @param[in] x Relative X position - * @param[in] y Relative Y position - * @param[in] z Relative Z position - */ -uint tensor3D_offset_fp16(Tensor3D tensor, int x, int y, int z) -{ - return CONVERT(CONVERT(tensor.current_offset, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint); -} - -///////////////////////////////////////////////////////////// -// new one - -#define GC_CONVERT_TO_VECTOR_STRUCT(name) \ - gc_update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x) - -#define GC_CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \ - gc_update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0)) - -#define GC_CONVERT_TO_IMAGE_STRUCT(name) \ - gc_update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y) - -#define GC_CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \ - gc_update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0)) - -#define GC_CONVERT_TO_TENSOR3D_STRUCT(name) \ - gc_update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \ - name##_stride_z, name##_step_z) - -#define GC_CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \ - gc_update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0)) - -#define GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \ - gc_update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z) - -#define GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \ - gc_update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z) - -Vector gc_update_vector_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x) -{ - Vector vector; - vector.offset_first_element_in_bytes = offset_first_element_in_bytes; - vector.stride_x = stride_x; - vector.current_offset = vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x; - - return vector; -} - -Image gc_update_image_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y) -{ - Image img; - img.offset_first_element_in_bytes = offset_first_element_in_bytes; - img.stride_x = stride_x; - img.stride_y = stride_y; - img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y; - - return img; -} - -Tensor3D gc_update_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z) -{ - Tensor3D tensor; - tensor.offset_first_element_in_bytes = offset_first_element_in_bytes; - tensor.stride_x = stride_x; - tensor.stride_y = stride_y; - tensor.stride_z = stride_z; - tensor.current_offset = tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z; - - return tensor; -} - -Image gc_update_image_from_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z) -{ - Image img; - img.offset_first_element_in_bytes = offset_first_element_in_bytes; - img.stride_x = stride_x; - img.stride_y = stride_y; - img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z; - - return img; -} - -#define GC_CURRENT_OFFSET(name) \ - name.current_offset - -uint gc_vector_offset(Vector vec, int x) -{ - return CONVERT(CONVERT(vec.current_offset, int) + x * CONVERT(vec.stride_x, int), uint); -} - -uint gc_image_offset(Image img, int x, int y) -{ - return CONVERT(CONVERT(img.current_offset, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint); -} - -uint gc_tensor3D_offset(Tensor3D tensor, int x, int y, int z) -{ - return CONVERT(CONVERT(tensor.current_offset, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint); -} - -// load/store number of element depends on buffer type -#define GC_LOAD1(r, name, offset) \ - r = name##_ptr[offset] - -#define GC_LOAD2(r, name, offset) \ - GC_LOAD1(r[0], name, offset); \ - GC_LOAD1(r[1], name, (offset) + uint(1)) - -#define GC_LOAD3(r, name, offset) \ - GC_LOAD1(r[0], name, offset); \ - GC_LOAD1(r[1], name, (offset) + uint(1)); \ - GC_LOAD1(r[2], name, (offset) + uint(2)) - -#define GC_STORE1(value, name, offset) \ - name##_ptr[offset] = value - -#define GC_STORE2(value, name, offset) \ - GC_STORE1(value[0], name, offset); \ - GC_STORE1(value[1], name, (offset) + uint(1)) - -#define GC_STORE3(value, name, offset) \ - GC_STORE1(value[0], name, offset); \ - GC_STORE1(value[1], name, (offset) + uint(1)); \ - GC_STORE1(value[2], name, (offset) + uint(2)) - -// has to manually expand them since not supported by compiler -#define GC_LOAD1_1D_OFFSET(r, name, x) \ - GC_LOAD1(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size) - -#define GC_LOAD1_2D_OFFSET(r, name, x, y) \ - GC_LOAD1(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size) - -#define GC_LOAD1_3D_OFFSET(r, name, x, y, z) \ - GC_LOAD1(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size) - -#define GC_STORE1_1D_OFFSET(value, name, x) \ - GC_STORE1(value, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size) - -#define GC_STORE1_2D_OFFSET(value, name, x, y) \ - GC_STORE1(value, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size) - -#define GC_STORE1_3D_OFFSET(value, name, x, y, z) \ - GC_STORE1(value, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size) - -#define GC_LOAD2_1D_OFFSET(r, name, x) \ - GC_LOAD2(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size) - -#define GC_LOAD2_2D_OFFSET(r, name, x, y) \ - GC_LOAD2(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size) - -#define GC_LOAD2_3D_OFFSET(r, name, x, y, z) \ - GC_LOAD2(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size) - -#define GC_STORE2_1D_OFFSET(value, name, x) \ - GC_STORE2(value, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size) - -#define GC_STORE2_2D_OFFSET(value, name, x, y) \ - GC_STORE2(value, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size) - -#define GC_STORE2_3D_OFFSET(value, name, x, y, z) \ - GC_STORE2(value, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size) - -#define GC_LOAD3_1D_OFFSET(r, name, x) \ - GC_LOAD3(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size) - -#define GC_LOAD3_2D_OFFSET(r, name, x, y) \ - GC_LOAD3(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size) - -#define GC_LOAD3_3D_OFFSET(r, name, x, y, z) \ - GC_LOAD3(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size) - -///////////////////////////////////////////////////////////// - -#endif // _HELPER_H diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp index a6111782fd..1aac2502e7 100644 --- a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017, 2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -122,18 +122,9 @@ void GCDepthConcatenateLayerKernel::run(const Window &window) do { - if(_input->info()->data_type() == DataType::F32) - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, 1, slice); - add_3D_tensor_argument(idx, _output, 2, slice); - } - else if(_input->info()->data_type() == DataType::F16) - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice); - add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice); - } + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, 1, slice); + add_3D_tensor_argument(idx, _output, 2, slice); _kernel.update_shader_params(); diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp index 06f9bce498..77423fd8bc 100644 --- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017, 2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -397,69 +397,21 @@ void GCDirectConvolutionLayerKernel::run(const Window &window) Window slice_in = win_in.first_slice_window_3D(); unsigned int idx1 = 2 * num_arguments_per_3D_tensor(); - add_3D_tensor_argument(idx1, _weights, BufferParam(3, 2), slice); + add_3D_tensor_argument(idx1, _weights, 3, slice); if(_bias != nullptr) { Window slice_bias; slice_bias.use_tensor_dimensions(_bias->info()->tensor_shape()); - add_1D_tensor_argument(idx1, _bias, BufferParam(4, 2), slice_bias); + add_1D_tensor_argument(idx1, _bias, 4, slice_bias); } do { unsigned int idx = 0; - switch(_input->info()->data_type()) - { - case DataType::F16: - switch(kernel_size) - { - case 1: - add_3D_tensor_argument(idx, _input, BufferParam(1, 4), slice_in); - add_3D_tensor_argument(idx, _output, BufferParam(2, 4), slice); - break; - - case 3: - add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice_in); - add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice); - break; - - case 5: - add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice_in); - add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice); - break; - - default: - ARM_COMPUTE_ERROR("Current kernel size %d is not supported", kernel_size); - break; - } - break; - - case DataType::F32: - switch(kernel_size) - { - case 1: - case 5: - add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice_in); - add_3D_tensor_argument(idx, _output, BufferParam(2, 2), slice); - break; - - case 3: - add_3D_tensor_argument(idx, _input, BufferParam(1, 4), slice_in); - add_3D_tensor_argument(idx, _output, BufferParam(2, 4), slice); - break; - - default: - ARM_COMPUTE_ERROR("Current kernel size %d is not supported", kernel_size); - break; - } - break; - - default: - ARM_COMPUTE_ERROR("Current data type is not supported"); - break; - } + add_3D_tensor_argument(idx, _input, 1, slice_in); + add_3D_tensor_argument(idx, _output, 2, slice); _kernel.update_shader_params(); enqueue(*this, slice, _lws); diff --git a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp index e87c902281..cd06be2585 100644 --- a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017, 2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -96,9 +96,9 @@ void GCDropoutLayerKernel::run(const Window &window) { unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice); - add_3D_tensor_argument(idx, _mask, BufferParam(2, 2), slice); - add_3D_tensor_argument(idx, _output, BufferParam(3, 2), slice); + add_3D_tensor_argument(idx, _input, 1, slice); + add_3D_tensor_argument(idx, _mask, 2, slice); + add_3D_tensor_argument(idx, _output, 3, slice); _kernel.update_shader_params(); enqueue(*this, slice); diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp index 944585daff..7ee39346f8 100644 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017, 2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -117,26 +117,9 @@ void GCGEMMMatrixAccumulateBiasesKernel::run(const Window &window) { // Set arguments unsigned int idx = 0; - if(_accum->info()->data_type() == DataType::F32) - { - add_2D_tensor_argument(idx, _accum, 1, accum_slice); - add_1D_tensor_argument(idx, _biases, 2, biases_slice); - } - else if(_accum->info()->data_type() == DataType::F16) - { -#if defined(ACCUM_PROCESS_4X) - BufferParam param = { 1, 3 }; - add_2D_tensor_argument(idx, _accum, param, accum_slice); - param.binding_point = 2; - add_1D_tensor_argument(idx, _biases, param, biases_slice); -#elif defined(ACCUM_PROCESS_8X) /* ACCUM_PROCESS_4X */ - BufferParam param = { 1, 4 }; - add_2D_tensor_argument(idx, _accum, param, accum_slice); - param.binding_point = 2; - add_1D_tensor_argument(idx, _biases, param, biases_slice); -#endif /* ACCUM_PROCESS_4X */ - } + add_2D_tensor_argument(idx, _accum, 1, accum_slice); + add_1D_tensor_argument(idx, _biases, 2, biases_slice); _kernel.update_shader_params(); enqueue(*this, accum_slice, _lws); diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp index baf1a9b85e..32fbbfeefb 100644 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017, 2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -201,35 +201,10 @@ void GCGEMMMatrixMultiplyKernel::run(const Window &window) } unsigned int idx = 0; - switch(_input0->info()->data_type()) - { - case DataType::F16: -#if defined(MM_PROCESS_4X) - add_2D_tensor_argument(idx, _input0, BufferParam(1, 2), slice); - add_2D_tensor_argument(idx, _input1, BufferParam(2, 3), slice_b); - add_2D_tensor_argument(idx, _output, BufferParam(3, 3), slice); -#elif defined(MM_PROCESS_4X_OPTIMIZED) /* MM_PROCESS_4X */ - add_2D_tensor_argument(idx, _input0, BufferParam(1, 4), slice); - add_2D_tensor_argument(idx, _input1, BufferParam(2, 3), slice_b); - add_2D_tensor_argument(idx, _output, BufferParam(3, 3), slice); -#elif defined(MM_PROCESS_8X) /* MM_PROCESS_4X */ - add_2D_tensor_argument(idx, _input0, BufferParam(1, 4), slice); - add_2D_tensor_argument(idx, _input1, BufferParam(2, 4), slice_b); - add_2D_tensor_argument(idx, _output, BufferParam(3, 4), slice); -#endif /* MM_PROCESS_4X */ - break; - - case DataType::F32: - add_2D_tensor_argument(idx, _input0, BufferParam(1, 2), slice); - add_2D_tensor_argument(idx, _input1, BufferParam(2, 2), slice_b); - add_2D_tensor_argument(idx, _output, BufferParam(3, 2), slice); - break; - - default: - ARM_COMPUTE_ERROR("Current data type is not supported"); - break; - } + add_2D_tensor_argument(idx, _input0, 1, slice); + add_2D_tensor_argument(idx, _input1, 2, slice_b); + add_2D_tensor_argument(idx, _output, 3, slice); _kernel.update_shader_params(); enqueue(*this, slice); } diff --git a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp index 5edc23b95e..b3a7a90931 100644 --- a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017, 2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -132,26 +132,9 @@ void GCTransposeKernel::run(const Window &window) do { unsigned int idx = 0; - if(_input->info()->data_type() == DataType::F32) - { - add_2D_tensor_argument(idx, _input, 1, slice); - add_2D_tensor_argument(idx, _output, 2, slice); - } - else if(_input->info()->data_type() == DataType::F16) - { -#if defined(TRANSPOSE_4X4) - BufferParam param = { 1, 3 }; - add_2D_tensor_argument(idx, _input, param, slice); - param.binding_point = 2; - add_2D_tensor_argument(idx, _output, param, slice); -#elif defined(TRANSPOSE_8X8) /* TRANSPOSE_4X4 */ - BufferParam param = { 1, 4 }; - add_2D_tensor_argument(idx, _input, param, slice); - param.binding_point = 2; - add_2D_tensor_argument(idx, _output, param, slice); -#endif /* TRANSPOSE_4X4 */ - } + add_2D_tensor_argument(idx, _input, 1, slice); + add_2D_tensor_argument(idx, _output, 2, slice); _kernel.update_shader_params(); enqueue(*this, slice, _lws_hint); } -- cgit v1.2.1