diff options
Diffstat (limited to 'src/gpu')
-rw-r--r-- | src/gpu/cl/kernels/ClDirectConv2dKernel.cpp | 6 | ||||
-rw-r--r-- | src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp | 64 |
2 files changed, 59 insertions, 11 deletions
diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp index fd14f009e1..781627117a 100644 --- a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp +++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp @@ -242,6 +242,12 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT build_options.add_option("-DSRC_TENSOR_TYPE=BUFFER"); build_options.add_option("-DSRC_DATA_TYPE=" + get_cl_type_from_data_type(src->data_type())); + build_options.add_option("-DSRC_CHANNELS=" + support::cpp11::to_string(src->dimension(0))); + build_options.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src->dimension(1))); + build_options.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(src->dimension(2))); + build_options.add_option("-DDST_CHANNELS=" + support::cpp11::to_string(dst->dimension(0))); + build_options.add_option("-DDST_WIDTH=" + support::cpp11::to_string(dst->dimension(1))); + build_options.add_option("-DDST_HEIGHT=" + support::cpp11::to_string(dst->dimension(2))); build_options.add_option("-DDST_TENSOR_TYPE=BUFFER"); build_options.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(dst_data_type)); build_options.add_option_if_else(_export_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER"); diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp index ad94678335..b693568c67 100644 --- a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp +++ b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp @@ -144,32 +144,45 @@ DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f16( const int32_t ofm = dst_shape[0]; const int32_t m = dst_shape[1] * dst_shape[2]; + const int32_t k = wei_shape[0]; const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1; desc.export_weights_to_cl_image = export_weights_to_cl_image; if(dst_shape[0] <= 4) { + // k0 should be as larger as possible. However, we should avoid + // having left-over for loops that make the implementation slower. + if((k % 16) == 0) + { + desc.k0 = 16; + } + else if((k % 8) == 0) + { + desc.k0 = 8; + } + else + { + desc.k0 = 4; + } + if(is_pointwise) { if(ofm == 4) { desc.m0 = 1; desc.n0 = 4; - desc.k0 = 16; } else { desc.m0 = 1; desc.n0 = 1; - desc.k0 = 16; } } else { desc.m0 = 1; desc.n0 = dst_shape[0]; - desc.k0 = 16; } } else @@ -178,21 +191,50 @@ DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f16( { desc.m0 = 1; desc.n0 = 1; - desc.k0 = 16; + if((k % 16) == 0) + { + desc.k0 = 16; + } + else if((k % 8) == 0) + { + desc.k0 = 8; + } + else + { + desc.k0 = 4; + } } else { - if(ofm > 16) + if(ofm >= 16) { - desc.m0 = 4; - desc.n0 = 4; - desc.k0 = 8; + if(m / 6 > 24000) + { + desc.m0 = 6; + } + else + { + desc.m0 = 5; + } + desc.n0 = 8; + desc.k0 = 4; } else { - desc.m0 = 4; - desc.n0 = 4; - desc.k0 = 16; + desc.m0 = 2; + desc.n0 = 8; + if((k % 16) == 0) + { + desc.k0 = 16; + } + else if((k % 8) == 0) + { + desc.k0 = 8; + } + else + { + desc.k0 = 4; + } } } } |