From 13ef1763d6eef9606afaed90fb258d1a4577f15b Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 14 Jul 2021 17:14:43 +0100 Subject: Improve filelist for GPU Resolves: COMPMID-4653 Signed-off-by: Georgios Pinitas Change-Id: I4f69d42369bf8ab91cd027acf1c97e92ec1ef554 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5948 Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- Android.bp | 2 - SConscript | 23 +- filelist.json | 69 ++- .../ClGemmDefaultReshapedRhsOnlyBifrost.cpp | 518 ------------------- .../ClGemmDefaultReshapedRhsOnlyValhall.cpp | 570 --------------------- 5 files changed, 59 insertions(+), 1123 deletions(-) delete mode 100644 src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultReshapedRhsOnlyBifrost.cpp delete mode 100644 src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultReshapedRhsOnlyValhall.cpp diff --git a/Android.bp b/Android.bp index 83e1e0a338..b32a9da895 100644 --- a/Android.bp +++ b/Android.bp @@ -378,8 +378,6 @@ cc_library_static { "src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp", "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp", "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp", - "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultReshapedRhsOnlyBifrost.cpp", - "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultReshapedRhsOnlyValhall.cpp", "src/core/helpers/SoftmaxHelpers.cpp", "src/core/helpers/WindowHelpers.cpp", "src/core/utils/AssemblyUtils.cpp", diff --git a/SConscript b/SConscript index 03b87e737f..886ad083ad 100644 --- a/SConscript +++ b/SConscript @@ -272,7 +272,7 @@ runtime_files_hp += Glob('src/runtime/CPP/ICPPSimpleFunction.cpp') runtime_files = Glob('src/runtime/CPP/functions/*.cpp') # C API files -runtime_files_hp += filelist['c_api']['cpu'] +runtime_files_hp += filelist['c_api']['common'] if env['opencl']: runtime_files_hp += filelist['c_api']['gpu'] @@ -297,19 +297,8 @@ if env['openmp']: runtime_files_hp += Glob('src/runtime/OMP/OMPScheduler.cpp') if env['opencl']: - core_files += Glob('src/core/CL/*.cpp') - core_files += Glob('src/core/gpu/cl/*.cpp') - - runtime_files += Glob('src/runtime/CL/*.cpp') + runtime_files_hp += filelist['gpu']['common'] runtime_files += Glob('src/runtime/CL/functions/*.cpp') - runtime_files += Glob('src/runtime/CL/gemm/*.cpp') - runtime_files += Glob('src/runtime/CL/tuners/*.cpp') - runtime_files += Glob('src/runtime/gpu/cl/*.cpp') - runtime_files += Glob('src/runtime/CL/mlgo/*.cpp') - runtime_files += Glob('src/runtime/CL/gemm_auto_heuristics/*.cpp') - - runtime_files += Glob('src/gpu/cl/*.cpp') - graph_files += Glob('src/graph/backends/CL/*.cpp') operators = filelist['gpu']['operators'] for operator in operators: @@ -317,6 +306,8 @@ if env['opencl']: if "kernel" in operators[operator]["files"]: core_files += operators[operator]["files"]["kernel"] + graph_files += Glob('src/graph/backends/CL/*.cpp') + sve_o = [] core_files_sve = [] if env['neon']: @@ -330,8 +321,6 @@ if env['neon']: "arm_compute/core/NEON/kernels/assembly/", "src/core/cpu/kernels/assembly/",]) - graph_files += Glob('src/graph/backends/NEON/*.cpp') - # Load files based on user's options operators = filelist['cpu']['operators'] for operator in operators: @@ -346,9 +335,11 @@ if env['neon']: core_files += file_list core_files_sve += file_list_sve + runtime_files_hp += filelist['cpu']['common'] runtime_files_hp += Glob('src/runtime/NEON/*.cpp') runtime_files += Glob('src/runtime/NEON/functions/*.cpp') - runtime_files_hp += filelist['cpu']['all'] + + graph_files += Glob('src/graph/backends/NEON/*.cpp') bootcode_o = [] if env['os'] == 'bare_metal': diff --git a/filelist.json b/filelist.json index 6babf70eab..d520f8e1dd 100644 --- a/filelist.json +++ b/filelist.json @@ -9,7 +9,7 @@ "src/common/TensorPack.cpp" ], "c_api": { - "cpu": [ + "common": [ "src/c/AclContext.cpp", "src/c/AclOperator.cpp", "src/c/AclQueue.cpp", @@ -17,11 +17,61 @@ "src/c/AclTensorPack.cpp", "src/c/AclVersion.cpp" ], + "cpu": [ + ], "gpu": [ "src/c/cl/AclOpenClExt.cpp" ] }, "gpu": { + "common": [ + "src/core/CL/CLCompileContext.cpp", + "src/core/CL/CLHelpers.cpp", + "src/core/CL/CLKernelLibrary.cpp", + "src/core/CL/CLUtils.cpp", + "src/core/CL/ICLKernel.cpp", + "src/core/CL/ICLSimple2DKernel.cpp", + "src/core/CL/ICLSimple3DKernel.cpp", + "src/core/CL/ICLSimpleKernel.cpp", + "src/core/CL/ICLTensor.cpp", + "src/core/CL/OpenCL.cpp", + "src/core/gpu/cl/ClKernelLibrary.cpp", + "src/core/gpu/cl/kernels/gemm/ClGemmHelpers.cpp", + "src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp", + "src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp", + "src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp", + "src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp", + "src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp", + "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp", + "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp", + "src/core/CL/kernels/CLFillBorderKernel.cpp", + "src/gpu/cl/ClContext.cpp", + "src/gpu/cl/ClQueue.cpp", + "src/gpu/cl/ClTensor.cpp", + "src/runtime/CL/CLBufferAllocator.cpp", + "src/runtime/CL/CLGEMMHeuristicsHandle.cpp", + "src/runtime/CL/CLHelpers.cpp", + "src/runtime/CL/CLMemory.cpp", + "src/runtime/CL/CLMemoryRegion.cpp", + "src/runtime/CL/CLOperator.cpp", + "src/runtime/CL/CLRuntimeContext.cpp", + "src/runtime/CL/CLScheduler.cpp", + "src/runtime/CL/CLSubTensor.cpp", + "src/runtime/CL/CLTensor.cpp", + "src/runtime/CL/CLTensorAllocator.cpp", + "src/runtime/CL/CLTuner.cpp", + "src/runtime/CL/ICLSimpleFunction.cpp", + "src/runtime/CL/Utils.cpp", + "src/runtime/CL/gemm/CLGEMMDefaultTypeBifrost.cpp", + "src/runtime/CL/gemm/CLGEMMDefaultTypeMidgard.cpp", + "src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.cpp", + "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.cpp", + "src/runtime/CL/mlgo/HeuristicTree.cpp", + "src/runtime/CL/mlgo/MLGOHeuristics.cpp", + "src/runtime/CL/mlgo/MLGOParser.cpp", + "src/runtime/CL/mlgo/Utils.cpp", + "src/runtime/CL/tuners/CLTuningParametersList.cpp" + ], "high_priority": [ "Activation", "DepthwiseConv2d", @@ -214,16 +264,8 @@ ], "kernel": [ "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp", - "src/core/gpu/cl/kernels/gemm/ClGemmHelpers.cpp", - "src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp", - "src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp", - "src/core/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp", "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp", - "src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp", - "src/core/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp", "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp", - "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp", - "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp", "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp", "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp", "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp" @@ -418,13 +460,6 @@ ] } }, - "FillBorder": { - "files": { - "kernel": [ - "src/core/CL/kernels/CLFillBorderKernel.cpp" - ] - } - }, "FuseBatchNormalization": { "files": { "kernel": [ @@ -629,7 +664,7 @@ } }, "cpu": { - "all": [ + "common": [ "src/cpu/CpuContext.cpp", "src/cpu/CpuQueue.cpp", "src/cpu/CpuTensor.cpp" diff --git a/src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultReshapedRhsOnlyBifrost.cpp b/src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultReshapedRhsOnlyBifrost.cpp deleted file mode 100644 index 7ed6b39f3e..0000000000 --- a/src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultReshapedRhsOnlyBifrost.cpp +++ /dev/null @@ -1,518 +0,0 @@ -/* - * Copyright (c) 2019-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/GPUTarget.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/gpu/cl/kernels/gemm/ClGemmHelpers.h" - -#include - -namespace arm_compute -{ -namespace opencl -{ -namespace kernels -{ -namespace gemm -{ -using namespace arm_compute::misc::shape_calculator; - -ClGemmDefaultConfigReshapedRhsOnlyBifrost::ClGemmDefaultConfigReshapedRhsOnlyBifrost(GPUTarget gpu) - : IClGemmKernelConfig(gpu) -{ -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) -{ - using ConfigurationFunctionExecutorPtr = std::pair (ClGemmDefaultConfigReshapedRhsOnlyBifrost::*)(unsigned int m, unsigned int n, unsigned int k, - unsigned int b); - - CLGEMMConfigArray configs_G51(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f32, - &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f16, - &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_u8); - - CLGEMMConfigArray configs_G52(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f32, - &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f16, - &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8); - - CLGEMMConfigArray configs_G76(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f32, - &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f16, - &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_u8); - - CLGEMMConfigArray configs_G7x(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32, - &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16, - &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8); - - ConfigurationFunctionExecutorPtr func = nullptr; - - switch(_target) - { - case GPUTarget::G76: - func = configs_G76.get_function(data_type); - break; - case GPUTarget::G51: - func = configs_G51.get_function(data_type); - break; - case GPUTarget::G52: - func = configs_G52.get_function(data_type); - break; - default: - func = configs_G7x.get_function(data_type); - break; - } - - ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not support for GEMM"); - return (this->*func)(m, n, k, b); -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - ARM_COMPUTE_UNUSED(k); - ARM_COMPUTE_UNUSED(b); - - if(m == 1) - { - if(n <= 2548) - { - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 4, false, true, false, true, false); - } - else - { - return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 8, false, true, false, true, false); - } - } - else - { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 4, false, true, false, true); - } -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - ARM_COMPUTE_UNUSED(k); - ARM_COMPUTE_UNUSED(b); - - GEMMLHSMatrixInfo lhs_info_buf; - GEMMRHSMatrixInfo rhs_info_buf; - GEMMLHSMatrixInfo lhs_info_img; - GEMMRHSMatrixInfo rhs_info_img; - - const bool is_workload_big = ((m * n * b) / 16) >= 2048; - - if(m == 1) - { - if(n >= 8192) - { - const unsigned int h0 = std::max(n / 4, 1U); - return configure_lhs_rhs_info(m, n, 1, 4, 8, 1, h0, false, true, false, true, false); - } - else - { - const unsigned int h0 = std::max(n / 2, 1U); - if(n <= 204) - { - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true, false); - } - else - { - return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true, false); - } - } - } - else - { - const int h0 = std::max(std::min(static_cast(n / 4), static_cast(16)), static_cast(1)); - if(is_workload_big) - { - std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, true); - } - else - { - std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true); - } - } - - // Get lhs_info/rhs_info in case of OpenCL image - const int h0 = std::max(std::min(static_cast(n / 4), static_cast(16)), static_cast(1)); - if(is_workload_big) - { - std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, false, true); - } - else - { - std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true, true); - } - - const TensorInfo tensor_rhs_info(TensorShape(n, k, b), 1, DataType::F32); - const TensorShape shape = compute_rhs_reshaped_shape(tensor_rhs_info, rhs_info_img); - const TensorInfo tensor_reshaped_info(shape, 1, DataType::F32); - - // In case of vector by matrix or small workloads, we use the OpenCL buffer rather than the OpenCL image2d - const bool use_cl_image2d = ((m == 1) || ((((m * n * b) / 16) < 2048) && n < 128)) ? false : true; - - if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d) - { - return std::make_pair(lhs_info_img, rhs_info_img); - } - else - { - return std::make_pair(lhs_info_buf, rhs_info_buf); - } -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - const float workload = (static_cast(m) * static_cast(n) * static_cast(b)) / 20.0f; - const float r_nk = static_cast(n) / static_cast(k); - - GEMMLHSMatrixInfo lhs_info_buf; - GEMMRHSMatrixInfo rhs_info_buf; - GEMMLHSMatrixInfo lhs_info_img; - GEMMRHSMatrixInfo rhs_info_img; - - if(m == 1) - { - if(r_nk <= 0.4664f) - { - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 16, false, true, false, true, false); - } - else - { - std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, true); - std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, false); - - return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img), - std::make_pair(lhs_info_buf, rhs_info_buf), - n, k, b, DataType::F32); - } - } - else - { - if(workload <= 274.4000f) - { - return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 16, false, false, false, true, false); - } - else - { - std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, true); - std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, false); - - return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img), - std::make_pair(lhs_info_buf, rhs_info_buf), - n, k, b, DataType::F32); - } - } -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - ARM_COMPUTE_UNUSED(k); - ARM_COMPUTE_UNUSED(b); - - if(m == 1) - { - const unsigned int n0 = n < 1280 ? 2 : 4; - const unsigned int h0 = std::max(n / n0, 1U); - return configure_lhs_rhs_info(m, n, 1, n0, 4, 1, h0, false, true, false, true); - } - else - { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true); - } -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - ARM_COMPUTE_UNUSED(k); - ARM_COMPUTE_UNUSED(b); - - if(m == 1) - { - if(n > 2048) - { - const unsigned int h0 = std::max(n / 4, 1U); - return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, false, true, false, true); - } - else - { - const unsigned int h0 = std::max(n / 2, 1U); - return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true); - } - } - else - { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 4, false, true, false, true); - } -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - const float r_mn = static_cast(m) / static_cast(n); - const float workload = (static_cast(m) * static_cast(n) * static_cast(b)) / 20.0f; - const float r_mk = static_cast(m) / static_cast(k); - const float r_nk = static_cast(n) / static_cast(k); - - GEMMLHSMatrixInfo lhs_info_buf; - GEMMRHSMatrixInfo rhs_info_buf; - GEMMLHSMatrixInfo lhs_info_img; - GEMMRHSMatrixInfo rhs_info_img; - - if(m == 1) - { - std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, false); - - if(r_mk <= 0.0026f) - { - if(r_nk <= 0.4664f) - { - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false); - } - else - { - std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true); - return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img), - std::make_pair(lhs_info_buf, rhs_info_buf), - n, k, b, DataType::F16); - } - } - else - { - if(r_mk <= 0.0148f) - { - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false); - } - else - { - std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true); - return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img), - std::make_pair(lhs_info_buf, rhs_info_buf), - n, k, b, DataType::F16); - } - } - } - else - { - std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 8, 4, 1, 2, false, false, false, false, false); - - if(workload <= 362.6000f) - { - return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 16, false, false, false, true, false); - } - else - { - if(r_mn <= 22.6067f) - { - if(workload <= 708.8000f) - { - std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true); - return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img), - std::make_pair(lhs_info_buf, rhs_info_buf), - n, k, b, DataType::F16); - } - else - { - return configure_lhs_rhs_info(m, n, 5, 8, 2, 1, 16, false, false, false, false, false); - } - } - else - { - if(r_nk <= 0.0917f) - { - return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 16, false, false, false, true, false); - } - else - { - std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true); - return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img), - std::make_pair(lhs_info_buf, rhs_info_buf), - n, k, b, DataType::F16); - } - } - } - } -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - ARM_COMPUTE_UNUSED(k); - - if(m == 1) - { - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false); - } - else - { - const float r_mn = static_cast(m) / static_cast(n); - const float workload = (static_cast(m) * static_cast(n) * static_cast(b)) / 20.0f; - - if(workload <= 7449.60f) - { - if(workload <= 691.60f) - { - return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 8, false, false, false, false, false); - } - else - { - if(workload <= 4155.20f) - { - return configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false); - } - else - { - return configure_lhs_rhs_info(m, n, 5, 8, 2, 1, 32, false, false, false, false, false); - } - } - } - else - { - if(workload <= 16300.80f) - { - if(r_mn <= 44.56f) - { - GEMMLHSMatrixInfo lhs_info_buf; - GEMMRHSMatrixInfo rhs_info_buf; - GEMMLHSMatrixInfo lhs_info_img; - GEMMRHSMatrixInfo rhs_info_img; - - std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 8, 4, 4, 1, 1, false, true, false, false, true); - std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false); - - return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img), - std::make_pair(lhs_info_buf, rhs_info_buf), - n, k, b, DataType::F16); - } - else - { - return configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false); - } - } - else - { - GEMMLHSMatrixInfo lhs_info_buf; - GEMMRHSMatrixInfo rhs_info_buf; - GEMMLHSMatrixInfo lhs_info_img; - GEMMRHSMatrixInfo rhs_info_img; - - std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, true, false, false, true); - std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false); - - return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img), - std::make_pair(lhs_info_buf, rhs_info_buf), - n, k, b, DataType::F16); - } - } - } -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - ARM_COMPUTE_UNUSED(k); - ARM_COMPUTE_UNUSED(b); - - if(m == 1) - { - const unsigned int n0 = n < 1280 ? 2 : 4; - const unsigned int h0 = std::max(n / n0, 1U); - return configure_lhs_rhs_info(m, n, 1, n0, 8, 1, h0, false, true, false, true); - } - else - { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true); - } -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - ARM_COMPUTE_UNUSED(k); - ARM_COMPUTE_UNUSED(b); - - if(dot8_supported(CLKernelLibrary::get().get_device())) - { - if(m == 1) - { - const unsigned int h0 = std::max(n / 2, 1U); - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true); - } - else - { - const unsigned int h0 = std::max(n / 4, 1U); - return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, h0, false, true, false, true); - } - } - else - { - const int h0 = std::max(std::min(static_cast(n / 2), static_cast(128)), static_cast(1)); - if(m == 1) - { - return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, h0, false, true, false, true); - } - else - { - return configure_lhs_rhs_info(m, n, 4, 2, 16, 1, h0, false, true, false, true); - } - } -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - ARM_COMPUTE_UNUSED(k); - ARM_COMPUTE_UNUSED(b); - - if(m == 1) - { - const unsigned int h0 = std::max(n / 2, 1U); - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true); - } - else - { - return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, 2, false, true, false, true); - } -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - ARM_COMPUTE_UNUSED(k); - ARM_COMPUTE_UNUSED(b); - - if(m == 1) - { - const unsigned int h0 = std::max(n / 2, 1U); - return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, false, true, false, true); - } - else - { - const unsigned int h0 = std::max(n / 2, 1U); - return configure_lhs_rhs_info(m, n, 4, 2, 16, 1, h0, false, true, false, true); - } -} - -} // namespace gemm -} // namespace kernels -} // namespace opencl -} // namespace arm_compute diff --git a/src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultReshapedRhsOnlyValhall.cpp b/src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultReshapedRhsOnlyValhall.cpp deleted file mode 100644 index 4c6e633896..0000000000 --- a/src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultReshapedRhsOnlyValhall.cpp +++ /dev/null @@ -1,570 +0,0 @@ -/* - * Copyright (c) 2020-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/GPUTarget.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/gpu/cl/kernels/gemm/ClGemmHelpers.h" - -#include - -namespace arm_compute -{ -namespace opencl -{ -namespace kernels -{ -namespace gemm -{ -using namespace arm_compute::misc::shape_calculator; - -ClGemmDefaultConfigReshapedRhsOnlyValhall::ClGemmDefaultConfigReshapedRhsOnlyValhall(GPUTarget gpu) - : IClGemmKernelConfig(gpu) -{ -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyValhall::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) -{ - using ConfigurationFunctionExecutorPtr = std::pair (ClGemmDefaultConfigReshapedRhsOnlyValhall::*)(unsigned int m, unsigned int n, unsigned int k, - unsigned int b); - - CLGEMMConfigArray configs_G77(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32, - &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f16, - &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8); - - CLGEMMConfigArray configs_G78(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f32, - &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16, - &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8); - - ConfigurationFunctionExecutorPtr func = nullptr; - - switch(_target) - { - case GPUTarget::G78: - func = configs_G78.get_function(data_type); - break; - case GPUTarget::G77: - default: - func = configs_G77.get_function(data_type); - break; - } - - ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not support for GEMM"); - return (this->*func)(m, n, k, b); -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - if(m == 1) - { - const float r_mn = static_cast(m) / static_cast(n); - const float r_mk = static_cast(m) / static_cast(k); - - if(r_mk <= 0.0064484127797186375) - { - if(r_mn <= 0.0028273810748942196) - { - GEMMLHSMatrixInfo lhs_info_buf; - GEMMRHSMatrixInfo rhs_info_buf; - GEMMLHSMatrixInfo lhs_info_img; - GEMMRHSMatrixInfo rhs_info_img; - - const unsigned int h0 = std::max(n / 4, 1U); - std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, 0, 1, 0, 0, 1); - std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, 0, 1, 0, 1, 0); - - return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img), - std::make_pair(lhs_info_buf, rhs_info_buf), - n, k, b, DataType::F32); - } - else - { - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 8, 0, 1, 0, 0, 0); - } - } - else - { - if(r_mk <= 0.020312500186264515) - { - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 4, 0, 1, 0, 0, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, 0, 1, 0, 1, 0); - } - } - } - else - { - const float r_mn = static_cast(m) / static_cast(n); - const float workload = (static_cast(m) * static_cast(n) * static_cast(b)) / 20.0f; - const float r_mk = static_cast(m) / static_cast(k); - - if(workload <= 1999.2000122070312) - { - if(workload <= 747.1999816894531) - { - return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0); - } - else - { - GEMMLHSMatrixInfo lhs_info_buf; - GEMMRHSMatrixInfo rhs_info_buf; - GEMMLHSMatrixInfo lhs_info_img; - GEMMRHSMatrixInfo rhs_info_img; - std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 2, 0, 0, 0, 1, 1); - std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0); - - return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img), - std::make_pair(lhs_info_buf, rhs_info_buf), - n, k, b, DataType::F32); - } - } - else - { - if(r_mn <= 0.03348214365541935) - { - if(r_mk <= 0.028125000186264515) - { - return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0); - } - else - { - GEMMLHSMatrixInfo lhs_info_buf; - GEMMRHSMatrixInfo rhs_info_buf; - GEMMLHSMatrixInfo lhs_info_img; - GEMMRHSMatrixInfo rhs_info_img; - std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 2, 0, 0, 0, 1, 1); - std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0); - - return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img), - std::make_pair(lhs_info_buf, rhs_info_buf), - n, k, b, DataType::F32); - } - } - else - { - GEMMLHSMatrixInfo lhs_info_buf; - GEMMRHSMatrixInfo rhs_info_buf; - GEMMLHSMatrixInfo lhs_info_img; - GEMMRHSMatrixInfo rhs_info_img; - std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, 0, 1, 0, 0, 1); - std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 16, 0, 1, 0, 1, 0); - - return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img), - std::make_pair(lhs_info_buf, rhs_info_buf), - n, k, b, DataType::F32); - } - } - } -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - ARM_COMPUTE_UNUSED(k); - ARM_COMPUTE_UNUSED(b); - - if(m == 1) - { - const unsigned int h0 = std::max(n / 2, 1U); - if(n <= 836.0) - { - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, 0, 1, 0, 1, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, 0, 1, 0, 1, 0); - } - } - else if(m < 128) - { - const int h0 = std::max(std::min(static_cast(n / 4), static_cast(256)), static_cast(1)); - if(k >= 512) - { - return configure_lhs_rhs_info(m, n, 2, 4, 16, 1, h0, 0, 1, 0, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, 0, 1, 0, 0); - } - } - else - { - const int h0 = std::max(std::min(static_cast(n / 4), static_cast(256)), static_cast(1)); - if(n >= 64) - { - return configure_lhs_rhs_info(m, n, 4, 8, 4, 1, h0, 0, 1, 0, 0); - } - else - { - if(k >= 512) - { - return configure_lhs_rhs_info(m, n, 2, 4, 16, 1, h0, 0, 1, 0, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, 0, 1, 0, 0); - } - } - } -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - ARM_COMPUTE_UNUSED(k); - ARM_COMPUTE_UNUSED(b); - - if(m == 1) - { - const unsigned int h0 = std::max(n / 2, 1U); - return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, 0, 1, 0, 1); - } - else - { - const int h0 = std::max(std::min(static_cast(n / 4), static_cast(256)), static_cast(1)); - if(m >= 28) - { - return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, h0, 0, 1, 0, 1); - } - else - { - return configure_lhs_rhs_info(m, n, 2, 4, 16, 1, h0, 0, 1, 0, 1); - } - } -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - const float r_mn = static_cast(m) / static_cast(n); - const float r_mk = static_cast(m) / static_cast(k); - const float r_nk = static_cast(n) / static_cast(k); - const float workload = (static_cast(m) * static_cast(n) * static_cast(b)) / 20.0f; - - if(m == 1) - { - if(workload <= 278.7000f) - { - if(workload <= 7.5000f) - { - return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0); - } - else - { - if(r_mn <= 0.0031f) - { - if(workload <= 256.6000f) - { - if(workload <= 16.7500f) - { - if(r_nk <= 1.6671f) - { - return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0); - } - } - else - { - return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0); - } - } - else - { - return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0); - } - } - else - { - if(r_mk <= 0.0027f) - { - if(r_mk <= 0.0014f) - { - return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0); - } - else - { - if(workload <= 8.9500f) - { - return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0); - } - } - } - else - { - if(workload <= 14.1500f) - { - return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0); - } - else - { - if(r_mk <= 0.0041f) - { - return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0); - } - } - } - } - } - } - else - { - if(workload <= 363.7000f) - { - if(r_mk <= 0.0031f) - { - return configure_lhs_rhs_info(m, n, 1, 4, 2, 1, 32, 0, 1, 0, 1, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, 32, 0, 1, 0, 1, 0); - } - } - else - { - return configure_lhs_rhs_info(m, n, 1, 4, 2, 1, 32, 0, 1, 0, 1, 0); - } - } - } - else - { - if(workload <= 1384.8000f) - { - if(workload <= 704.0000f) - { - return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 32, 0, 1, 0, 1, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 4, 0, 0, 0, 1, 1); - } - } - else - { - if(workload <= 16761.6006f) - { - if(r_mn <= 187.1250f) - { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 16, 0, 0, 0, 1, 1); - } - else - { - return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 4, 0, 0, 0, 1, 1); - } - } - else - { - if(r_mk <= 432.4630f) - { - return configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 16, 0, 0, 0, 1, 1); - } - else - { - return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 16, 0, 1, 0, 1, 1); - } - } - } - } -} - -std::pair ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b) -{ - const float r_mn = static_cast(m) / static_cast(n); - const float r_mk = static_cast(m) / static_cast(k); - const float r_nk = static_cast(n) / static_cast(k); - const float workload = (static_cast(m) * static_cast(n) * static_cast(b)) / 20.0f; - - if(m == 1) - { - if(r_mn <= 0.0038f) - { - if(workload <= 353.9000f) - { - if(workload <= 278.7000f) - { - return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 32, 0, 0, 1, 0, 0); - } - else - { - if(r_mk <= 0.0004f) - { - return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 32, 0, 0, 1, 0, 0); - } - else - { - if(r_mk <= 0.0030f) - { - return configure_lhs_rhs_info(m, n, 1, 8, 4, 1, 8, 0, 1, 1, 0, 1); - } - else - { - return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 32, 0, 0, 1, 0, 0); - } - } - } - } - else - { - if(r_nk <= 1.9384f) - { - return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 32, 0, 0, 1, 0, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 1, 8, 4, 1, 8, 0, 1, 1, 0, 1); - } - } - } - else - { - if(r_nk <= 1.0368f) - { - return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, 0, 0, 1, 0, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 32, 0, 0, 1, 0, 0); - } - } - } - else - { - if(workload <= 1422.4000f) - { - if(workload <= 704.0000f) - { - return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 32, 0, 0, 1, 0, 0); - } - else - { - if(workload <= 1197.6000f) - { - return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 8, 0, 1, 1, 0, 1); - } - else - { - if(workload <= 1241.6000f) - { - return configure_lhs_rhs_info(m, n, 2, 8, 8, 1, 16, 0, 1, 1, 0, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 8, 0, 1, 1, 0, 1); - } - } - } - } - else - { - if(workload <= 2769.6000f) - { - if(workload <= 1846.4000f) - { - if(r_mn <= 2.4927f) - { - return configure_lhs_rhs_info(m, n, 2, 8, 8, 1, 16, 0, 1, 1, 0, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 0); - } - } - else - { - if(r_mn <= 0.6261f) - { - return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 0); - } - else - { - if(r_mk <= 3.4453f) - { - if(r_mn <= 1.4135f) - { - return configure_lhs_rhs_info(m, n, 2, 8, 8, 1, 16, 0, 1, 1, 0, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 0); - } - } - else - { - return configure_lhs_rhs_info(m, n, 2, 8, 8, 1, 16, 0, 1, 1, 0, 0); - } - } - } - } - else - { - if(r_nk <= 0.0302f) - { - return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 8, 0, 1, 1, 0, 1); - } - else - { - if(r_mk <= 181.3750f) - { - return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 0); - } - else - { - if(workload <= 28035.2002f) - { - return configure_lhs_rhs_info(m, n, 2, 8, 8, 1, 16, 0, 1, 1, 0, 0); - } - else - { - if(r_mk <= 808.6667f) - { - return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 0); - } - else - { - return configure_lhs_rhs_info(m, n, 2, 8, 8, 1, 16, 0, 1, 1, 0, 0); - } - } - } - } - } - } - } -} -} // namespace gemm -} // namespace kernels -} // namespace opencl -} // namespace arm_compute -- cgit v1.2.1