From b8fc60fc9fe9ad6c48d9618fc65753fbe5813216 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Wed, 25 Apr 2018 11:58:07 +0100 Subject: COMPMID-1086: Optimizing GCGEMM - Add helpers to get target GPU information This patch moves some of the helper functions in CLHelpers.[h,cpp] to GPUTarget.[h,cpp] in order to make them avaialable to both CL and GLES backends. Change-Id: Ib89b05a2084b73eb643b32fac13b8367cc80b6a5 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/128988 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- src/core/CL/CLHelpers.cpp | 126 ----------------- src/core/GLES_COMPUTE/GCHelpers.cpp | 34 +++++ src/core/GLES_COMPUTE/IGCKernel.cpp | 2 +- src/core/GLES_COMPUTE/OpenGLES.cpp | 18 ++- src/core/GLES_COMPUTE/gl_entries.in | 1 + .../kernels/GCGEMMInterleave4x4Kernel.cpp | 1 - .../kernels/GCGEMMMatrixAdditionKernel.cpp | 3 +- .../kernels/GCGEMMMatrixMultiplyKernel.cpp | 6 +- .../GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp | 1 - .../kernels/GCWeightsReshapeKernel.cpp | 1 - src/core/GPUTarget.cpp | 154 +++++++++++++++++++++ src/runtime/GLES_COMPUTE/GCScheduler.cpp | 9 +- .../GLES_COMPUTE/functions/GCConvolutionLayer.cpp | 4 + src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp | 8 +- 14 files changed, 228 insertions(+), 140 deletions(-) create mode 100644 src/core/GLES_COMPUTE/GCHelpers.cpp create mode 100644 src/core/GPUTarget.cpp (limited to 'src') diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp index 9842d4d0ab..cda29d69d1 100644 --- a/src/core/CL/CLHelpers.cpp +++ b/src/core/CL/CLHelpers.cpp @@ -27,74 +27,8 @@ #include "arm_compute/core/Log.h" #include "arm_compute/core/Types.h" -#include -#include #include -namespace -{ -arm_compute::GPUTarget get_bifrost_target(const std::string &version) -{ - if(version == "G71") - { - return arm_compute::GPUTarget::G71; - } - else if(version == "G72") - { - return arm_compute::GPUTarget::G72; - } - else if(version == "G51") - { - return arm_compute::GPUTarget::G51; - } - else if(version == "G51BIG") - { - return arm_compute::GPUTarget::G51BIG; - } - else if(version == "G51LIT") - { - return arm_compute::GPUTarget::G51LIT; - } - else if(version == "TNOX") - { - return arm_compute::GPUTarget::TNOX; - } - else if(version == "TTRX") - { - return arm_compute::GPUTarget::TTRX; - } - else if(version == "TBOX") - { - return arm_compute::GPUTarget::TBOX; - } - else - { - return arm_compute::GPUTarget::BIFROST; - } -} - -arm_compute::GPUTarget get_midgard_target(const std::string &version) -{ - if(version == "T600") - { - return arm_compute::GPUTarget::T600; - } - else if(version == "T700") - { - return arm_compute::GPUTarget::T700; - } - else if(version == "T800") - { - return arm_compute::GPUTarget::T800; - } - else - { - return arm_compute::GPUTarget::MIDGARD; - } -} - -} // namespace - namespace arm_compute { std::string get_cl_type_from_data_type(const DataType &dt) @@ -177,61 +111,6 @@ std::string get_underlying_cl_type_from_data_type(const DataType &dt) } } -const std::string &string_from_target(GPUTarget target) -{ - static std::map gpu_target_map = - { - { GPUTarget::MIDGARD, "midgard" }, - { GPUTarget::BIFROST, "bifrost" }, - { GPUTarget::T600, "t600" }, - { GPUTarget::T700, "t700" }, - { GPUTarget::T800, "t800" }, - { GPUTarget::G71, "g71" }, - { GPUTarget::G72, "g72" }, - { GPUTarget::G51, "g51" }, - { GPUTarget::G51BIG, "g51big" }, - { GPUTarget::G51LIT, "g51lit" }, - { GPUTarget::TNOX, "tnox" }, - { GPUTarget::TTRX, "ttrx" }, - { GPUTarget::TBOX, "tbox" } - }; - - return gpu_target_map[target]; -} - -GPUTarget get_target_from_name(const std::string &device_name) -{ - std::regex mali_regex(R"(Mali-(.*))"); - std::smatch name_parts; - const bool found_mali = std::regex_search(device_name, name_parts, mali_regex); - - if(!found_mali) - { - ARM_COMPUTE_LOG_INFO_MSG_CORE("Can't find valid Mali GPU. Target is set to UNKNOWN."); - return GPUTarget::UNKNOWN; - } - - const char target = name_parts.str(1)[0]; - const std::string &version = name_parts.str(1); - - std::regex future_regex(R"(.*X)"); - const bool is_future_bifrost = std::regex_search(version, future_regex); - - if(target == 'G' || is_future_bifrost) - { - return get_bifrost_target(version); - } - else if(target == 'T') - { - return get_midgard_target(version); - } - else - { - ARM_COMPUTE_LOG_INFO_MSG_CORE("Mali GPU unknown. Target is set to the default one. (BIFROST)"); - return GPUTarget::BIFROST; - } -} - GPUTarget get_target_from_device(cl::Device &device) { // Query device name size @@ -240,11 +119,6 @@ GPUTarget get_target_from_device(cl::Device &device) return get_target_from_name(device_name); } -GPUTarget get_arch_from_target(GPUTarget target) -{ - return (target & GPUTarget::GPU_ARCH_MASK); -} - bool arm_non_uniform_workgroup_supported(const cl::Device &device) { return device_supports_extension(device, "cl_arm_non_uniform_work_group_size"); diff --git a/src/core/GLES_COMPUTE/GCHelpers.cpp b/src/core/GLES_COMPUTE/GCHelpers.cpp new file mode 100644 index 0000000000..8970688ca7 --- /dev/null +++ b/src/core/GLES_COMPUTE/GCHelpers.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" + +namespace arm_compute +{ +GPUTarget get_target_from_device() +{ + const std::string device_name = reinterpret_cast(glGetString(GL_RENDERER)); + + return get_target_from_name(device_name); +} +} // namespace arm_compute diff --git a/src/core/GLES_COMPUTE/IGCKernel.cpp b/src/core/GLES_COMPUTE/IGCKernel.cpp index 55b7f0da4a..ecd63b54a4 100644 --- a/src/core/GLES_COMPUTE/IGCKernel.cpp +++ b/src/core/GLES_COMPUTE/IGCKernel.cpp @@ -62,7 +62,7 @@ void arm_compute::enqueue(IGCKernel &kernel, const Window &window, const gles::N } IGCKernel::IGCKernel() - : _kernel(), _lws_hint(gles::NDRange(1U, 1U, 1U)) + : _kernel(), _lws_hint(gles::NDRange(1U, 1U, 1U)), _target(GPUTarget::MIDGARD) { } diff --git a/src/core/GLES_COMPUTE/OpenGLES.cpp b/src/core/GLES_COMPUTE/OpenGLES.cpp index d2539d0775..e93b360125 100644 --- a/src/core/GLES_COMPUTE/OpenGLES.cpp +++ b/src/core/GLES_COMPUTE/OpenGLES.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -66,7 +66,8 @@ using glMapBufferRange_func = void *GL_APIENTRY (*)(GLenum target, GLin using glMemoryBarrier_func = void GL_APIENTRY (*)(GLbitfield barriers); using glUniform1ui_func = void GL_APIENTRY (*)(GLint location, GLuint v0); using glUnmapBuffer_func = GLboolean GL_APIENTRY (*)(GLenum target); -using glGetError_func = GLenum GL_APIENTRY (*)(); +using glGetError_func = GLenum GL_APIENTRY (*)(); +using glGetString_func = const GLubyte * GL_APIENTRY (*)(GLenum name); using glGetActiveUniformBlockiv_func = void GL_APIENTRY (*)(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params); using glUniformBlockBinding_func = void GL_APIENTRY (*)(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding); using glGetUniformBlockIndex_func = GLuint GL_APIENTRY (*)(GLuint program, const GLchar *uniformBlockName); @@ -668,6 +669,19 @@ GLenum GL_APIENTRY glGetError(void) } } +const GLubyte *GL_APIENTRY glGetString(GLenum name) +{ + auto func = GLESSymbols::get().glGetString; + if(func != nullptr) + { + return func(name); + } + else + { + return nullptr; + } +} + void GL_APIENTRY glGetActiveUniformBlockiv(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params) { auto func = GLESSymbols::get().glGetActiveUniformBlockiv; diff --git a/src/core/GLES_COMPUTE/gl_entries.in b/src/core/GLES_COMPUTE/gl_entries.in index 15ce8ee819..17e3aeead7 100644 --- a/src/core/GLES_COMPUTE/gl_entries.in +++ b/src/core/GLES_COMPUTE/gl_entries.in @@ -61,3 +61,4 @@ GL_ENTRY(glGenFramebuffers) GL_ENTRY(glDeleteFramebuffers) GL_ENTRY(glBindFramebuffer) GL_ENTRY(glFramebufferTexture2D) +GL_ENTRY(glGetString) diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp index dc86bfb2cc..171fbad702 100644 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp @@ -35,7 +35,6 @@ #include "arm_compute/core/Window.h" using namespace arm_compute; -using namespace arm_compute::gles_compute; GCGEMMInterleave4x4Kernel::GCGEMMInterleave4x4Kernel() : _input(nullptr), _output(nullptr) diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp index 43846dcf32..1a68a62ea5 100644 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,6 @@ #include "arm_compute/core/Window.h" using namespace arm_compute; -using namespace arm_compute::gles_compute; GCGEMMMatrixAdditionKernel::GCGEMMMatrixAdditionKernel() : _input(nullptr), _output(nullptr) diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp index b4bb5470ad..2bd769cac4 100644 --- a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp @@ -42,7 +42,6 @@ #include using namespace arm_compute; -using namespace arm_compute::gles_compute; using namespace arm_compute::misc::shape_calculator; namespace @@ -195,10 +194,13 @@ void GCGEMMMatrixMultiplyKernel::configure(const IGCTensor *input0, const IGCTen _input1 = input1; _output = output; + // Get target architecture + GPUTarget gpu_target = get_target(); + ElementsProcessed num_elements_processed{}; // Configure kernel window - auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info, GPUTarget::UNKNOWN, num_elements_processed); + auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info, gpu_target, num_elements_processed); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); IGCKernel::configure(win_config.second); diff --git a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp index 21946b7f8d..f0057df769 100644 --- a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp @@ -36,7 +36,6 @@ #include "support/ToolchainSupport.h" using namespace arm_compute; -using namespace arm_compute::gles_compute; GCTensorShiftKernel::GCTensorShiftKernel() : _input(nullptr), _lws(gles::NDRange(1U, 1U, 1U)), _left_padding(0) diff --git a/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp index 55bf9b754b..ccbfaf8410 100644 --- a/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.cpp @@ -36,7 +36,6 @@ #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" using namespace arm_compute; -using namespace arm_compute::gles_compute; using namespace arm_compute::misc::shape_calculator; GCWeightsReshapeKernel::GCWeightsReshapeKernel() diff --git a/src/core/GPUTarget.cpp b/src/core/GPUTarget.cpp new file mode 100644 index 0000000000..575d858589 --- /dev/null +++ b/src/core/GPUTarget.cpp @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GPUTarget.h" +#include "arm_compute/core/Log.h" + +#include +#include + +namespace +{ +arm_compute::GPUTarget get_bifrost_target(const std::string &version) +{ + if(version == "G71") + { + return arm_compute::GPUTarget::G71; + } + else if(version == "G72") + { + return arm_compute::GPUTarget::G72; + } + else if(version == "G51") + { + return arm_compute::GPUTarget::G51; + } + else if(version == "G51BIG") + { + return arm_compute::GPUTarget::G51BIG; + } + else if(version == "G51LIT") + { + return arm_compute::GPUTarget::G51LIT; + } + else if(version == "TNOX") + { + return arm_compute::GPUTarget::TNOX; + } + else if(version == "TTRX") + { + return arm_compute::GPUTarget::TTRX; + } + else if(version == "TBOX") + { + return arm_compute::GPUTarget::TBOX; + } + else + { + return arm_compute::GPUTarget::BIFROST; + } +} + +arm_compute::GPUTarget get_midgard_target(const std::string &version) +{ + if(version == "T600") + { + return arm_compute::GPUTarget::T600; + } + else if(version == "T700") + { + return arm_compute::GPUTarget::T700; + } + else if(version == "T800") + { + return arm_compute::GPUTarget::T800; + } + else + { + return arm_compute::GPUTarget::MIDGARD; + } +} +} // namespace + +namespace arm_compute +{ +const std::string &string_from_target(GPUTarget target) +{ + static std::map gpu_target_map = + { + { GPUTarget::MIDGARD, "midgard" }, + { GPUTarget::BIFROST, "bifrost" }, + { GPUTarget::T600, "t600" }, + { GPUTarget::T700, "t700" }, + { GPUTarget::T800, "t800" }, + { GPUTarget::G71, "g71" }, + { GPUTarget::G72, "g72" }, + { GPUTarget::G51, "g51" }, + { GPUTarget::G51BIG, "g51big" }, + { GPUTarget::G51LIT, "g51lit" }, + { GPUTarget::TNOX, "tnox" }, + { GPUTarget::TTRX, "ttrx" }, + { GPUTarget::TBOX, "tbox" } + }; + + return gpu_target_map[target]; +} + +GPUTarget get_target_from_name(const std::string &device_name) +{ + std::regex mali_regex(R"(Mali-(.*))"); + std::smatch name_parts; + const bool found_mali = std::regex_search(device_name, name_parts, mali_regex); + + if(!found_mali) + { + ARM_COMPUTE_LOG_INFO_MSG_CORE("Can't find valid Mali GPU. Target is set to UNKNOWN."); + return GPUTarget::UNKNOWN; + } + + const char target = name_parts.str(1)[0]; + const std::string &version = name_parts.str(1); + + std::regex future_regex(R"(.*X)"); + const bool is_future_bifrost = std::regex_search(version, future_regex); + + if(target == 'G' || is_future_bifrost) + { + return get_bifrost_target(version); + } + else if(target == 'T') + { + return get_midgard_target(version); + } + else + { + ARM_COMPUTE_LOG_INFO_MSG_CORE("Mali GPU unknown. Target is set to the default one. (BIFROST)"); + return GPUTarget::BIFROST; + } +} + +GPUTarget get_arch_from_target(GPUTarget target) +{ + return (target & GPUTarget::GPU_ARCH_MASK); +} +} // namespace arm_compute diff --git a/src/runtime/GLES_COMPUTE/GCScheduler.cpp b/src/runtime/GLES_COMPUTE/GCScheduler.cpp index fcc855957f..f7812730fc 100644 --- a/src/runtime/GLES_COMPUTE/GCScheduler.cpp +++ b/src/runtime/GLES_COMPUTE/GCScheduler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" using namespace arm_compute; @@ -31,7 +32,7 @@ using namespace arm_compute; std::once_flag GCScheduler::_initialize_symbols; GCScheduler::GCScheduler() - : _display(EGL_NO_DISPLAY), _context(EGL_NO_CONTEXT) + : _display(EGL_NO_DISPLAY), _context(EGL_NO_CONTEXT), _target(GPUTarget::MIDGARD) { } @@ -48,11 +49,13 @@ void GCScheduler::default_init() { setup_context(); - GCKernelLibrary::get().init("./cs_shaders/", _display, _context); + init(_display, _context); } void GCScheduler::init(EGLDisplay dpy, EGLContext ctx) { + _target = get_target_from_device(); + GCKernelLibrary::get().init("./cs_shaders/", dpy, ctx); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp index dc73eb85e6..d1ef87d32c 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp @@ -109,6 +109,10 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig const DataType dt = input->info()->data_type(); + // Set the GPU target for im2col and col2im + _input_im2col_kernel.set_target(GCScheduler::get().get_target()); + _output_col2im_kernel.set_target(GCScheduler::get().get_target()); + const bool append_bias = (biases != nullptr); const unsigned bias_element = (append_bias) ? 1 : 0; const IGCTensor *biases_to_use = (append_bias) ? biases : nullptr; diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp index 0a75a38c50..79f8f71713 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp @@ -38,7 +38,6 @@ #include "arm_compute/runtime/ITensorAllocator.h" using namespace arm_compute; -using namespace arm_compute::gles_compute; namespace { @@ -92,6 +91,13 @@ void GCGEMM::configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor * const IGCTensor *matrix_a = a; const IGCTensor *matrix_b = b; + // Get the GPU target + const GPUTarget gpu_target = GCScheduler::get().get_target(); + + // Set the target for the kernels + _interleave_kernel.set_target(gpu_target); + _mm_kernel.set_target(gpu_target); + // Arguments used by GEMMReshapeInfo // If we pass the matrix A and matrix B reshaped to GCGEMMMatrixMultiplyKernel, we need to pass m, n, k, mult_transpose1xW_width and mult_interleave4x4_height to GCGEMMReshapeInfo // in order to know how the matrices have been reshaped -- cgit v1.2.1