aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/functions/CLGEMM.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/CL/functions/CLGEMM.cpp')
-rw-r--r--src/runtime/CL/functions/CLGEMM.cpp10
1 files changed, 7 insertions, 3 deletions
diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp
index f81da6c0a5..7f37520f10 100644
--- a/src/runtime/CL/functions/CLGEMM.cpp
+++ b/src/runtime/CL/functions/CLGEMM.cpp
@@ -48,13 +48,17 @@ inline bool is_interleaved_transposed(int m, int n, int k, DataType data_type, b
{
bool flag = true;
- if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::TNOX))
+ if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72))
{
// COMPMID-852
if(k > 256 && m > 4 && is_data_type_float(data_type) && reshape_b_only_on_first_run)
{
- const float scale = k < 1024 ? 2.0f : 2.5f;
- flag = (scale * n) > ((1.66f * n) + 38.4f);
+ constexpr float alpha = 3.2f;
+ constexpr float fact0 = 1.51f;
+ constexpr float fact1 = 1.66f;
+ constexpr float ops = 12.0f;
+ const float scale = k > 1024 ? 1.07f : 1.0f;
+ flag = alpha + ((n * fact0) / ops) < ((fact1 * n * scale) / ops);
}
else
{