diff options
Diffstat (limited to 'src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp')
-rw-r--r-- | src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp | 127 |
1 files changed, 73 insertions, 54 deletions
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp index 51f3787875..ef1bb3858c 100644 --- a/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp +++ b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp @@ -22,7 +22,6 @@ * SOFTWARE. */ #include "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h" -#include "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/GPUTarget.h" @@ -30,31 +29,36 @@ #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" +#include "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h" + namespace arm_compute { namespace cl_dwc { -ClDWCNativeDefaultConfigValhall::ClDWCNativeDefaultConfigValhall(GPUTarget gpu) - : IClDWCNativeKernelConfig(gpu) +ClDWCNativeDefaultConfigValhall::ClDWCNativeDefaultConfigValhall(GPUTarget gpu) : IClDWCNativeKernelConfig(gpu) { } -DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation, - unsigned int depth_multiplier) +DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure(const ITensorInfo *src, + const ITensorInfo *wei, + const PadStrideInfo &conv_info, + const Size2D &dilation, + unsigned int depth_multiplier) { - using ConfigurationFunctionExecutorPtr = DWCComputeKernelInfo (ClDWCNativeDefaultConfigValhall::*)(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation, - unsigned int depth_multiplier); + using ConfigurationFunctionExecutorPtr = DWCComputeKernelInfo (ClDWCNativeDefaultConfigValhall::*)( + const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation, + unsigned int depth_multiplier); - ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClDWCNativeDefaultConfigValhall::configure_G78_f32, - &ClDWCNativeDefaultConfigValhall::configure_G78_f16, - &ClDWCNativeDefaultConfigValhall::configure_G78_u8); + ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G78( + &ClDWCNativeDefaultConfigValhall::configure_G78_f32, &ClDWCNativeDefaultConfigValhall::configure_G78_f16, + &ClDWCNativeDefaultConfigValhall::configure_G78_u8); - ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(&ClDWCNativeDefaultConfigValhall::configure_G78_f32, - &ClDWCNativeDefaultConfigValhall::configure_G77_f16, - &ClDWCNativeDefaultConfigValhall::configure_G78_u8); + ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G77( + &ClDWCNativeDefaultConfigValhall::configure_G78_f32, &ClDWCNativeDefaultConfigValhall::configure_G77_f16, + &ClDWCNativeDefaultConfigValhall::configure_G78_u8); ConfigurationFunctionExecutorPtr func = nullptr; - switch(_target) + switch (_target) { case GPUTarget::G77: func = configs_G77.get_function(src->data_type()); @@ -69,15 +73,18 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure(const ITensorInf return (this->*func)(src, wei, conv_info, dilation, depth_multiplier); } -DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation, - unsigned int depth_multiplier) +DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f32(const ITensorInfo *src, + const ITensorInfo *wei, + const PadStrideInfo &conv_info, + const Size2D &dilation, + unsigned int depth_multiplier) { DWCComputeKernelInfo desc; - if(src->data_layout() == DataLayout::NHWC) + if (src->data_layout() == DataLayout::NHWC) { - const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL); - const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH); + const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL); + const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH); const TensorShape wei_shape = wei->tensor_shape(); const size_t kernel_c = wei_shape[idx_c]; const size_t kernel_w = wei_shape[idx_w]; @@ -85,17 +92,17 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f32(const IT desc.export_input_to_cl_image = false; desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier); - if(depth_multiplier == 1) + if (depth_multiplier == 1) { desc.n0 = 4; } else { - if((depth_multiplier % 4) == 0) + if ((depth_multiplier % 4) == 0) { desc.n0 = 4; } - else if((depth_multiplier % 2) == 0) + else if ((depth_multiplier % 2) == 0) { desc.n0 = 2; } @@ -106,14 +113,15 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f32(const IT } // Note: If we reduce n0, export to cl_image must be false - ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true)); + ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && + (desc.export_weights_to_cl_image == true)); desc.n0 = adjust_vec_size(desc.n0, kernel_c); // Set m0 only if stride_x == 1 and dilation_x == 1 - if(conv_info.stride().first == 1 && dilation.x() == 1) + if (conv_info.stride().first == 1 && dilation.x() == 1) { - if((kernel_w >= 9) || (kernel_w == 1)) + if ((kernel_w >= 9) || (kernel_w == 1)) { desc.m0 = 1; } @@ -131,16 +139,19 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f32(const IT return desc; } -DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation, - unsigned int depth_multiplier) +DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const ITensorInfo *src, + const ITensorInfo *wei, + const PadStrideInfo &conv_info, + const Size2D &dilation, + unsigned int depth_multiplier) { DWCComputeKernelInfo desc; - if(src->data_layout() == DataLayout::NHWC) + if (src->data_layout() == DataLayout::NHWC) { // Src and weights have the same dimension indices - const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL); - const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH); + const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL); + const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH); const TensorShape src_shape = src->tensor_shape(); const TensorShape wei_shape = wei->tensor_shape(); const size_t src_w = src_shape[idx_w]; @@ -150,9 +161,9 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const IT desc.export_input_to_cl_image = false; desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier); - if(depth_multiplier == 1) + if (depth_multiplier == 1) { - if(desc.export_weights_to_cl_image == false) + if (desc.export_weights_to_cl_image == false) { desc.n0 = 8; } @@ -163,11 +174,11 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const IT } else { - if((depth_multiplier % 4) == 0) + if ((depth_multiplier % 4) == 0) { desc.n0 = 4; } - else if((depth_multiplier % 2) == 0) + else if ((depth_multiplier % 2) == 0) { desc.n0 = 2; } @@ -178,20 +189,21 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const IT } // Note: If we reduce n0, export to cl_image must be false - ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true)); + ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && + (desc.export_weights_to_cl_image == true)); desc.n0 = adjust_vec_size(desc.n0, kernel_c); // Set m0 only if stride_x == 1 and dilation_x == 1 - if(conv_info.stride().first == 1 && dilation.x() == 1) + if (conv_info.stride().first == 1 && dilation.x() == 1) { - if((kernel_w >= 9) || (kernel_w == 1)) + if ((kernel_w >= 9) || (kernel_w == 1)) { desc.m0 = 1; } else { - if((src_w % 5) == 0) + if ((src_w % 5) == 0) { desc.m0 = 5; } @@ -210,19 +222,22 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const IT return desc; } -DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation, - unsigned int depth_multiplier) +DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_u8(const ITensorInfo *src, + const ITensorInfo *wei, + const PadStrideInfo &conv_info, + const Size2D &dilation, + unsigned int depth_multiplier) { ARM_COMPUTE_UNUSED(wei); DWCComputeKernelInfo desc; - if(src->data_layout() == DataLayout::NHWC) + if (src->data_layout() == DataLayout::NHWC) { desc.export_input_to_cl_image = false; desc.export_weights_to_cl_image = false; desc.n0 = (depth_multiplier == 1) ? 4 : 1; - if(conv_info.stride().first == 1 && dilation.x() == 1 && depth_multiplier == 1) + if (conv_info.stride().first == 1 && dilation.x() == 1 && depth_multiplier == 1) { desc.m0 = 2; } @@ -235,15 +250,18 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_u8(const ITe return desc; } -DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G77_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation, - unsigned int depth_multiplier) +DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G77_f16(const ITensorInfo *src, + const ITensorInfo *wei, + const PadStrideInfo &conv_info, + const Size2D &dilation, + unsigned int depth_multiplier) { DWCComputeKernelInfo desc; - if(src->data_layout() == DataLayout::NHWC) + if (src->data_layout() == DataLayout::NHWC) { - const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL); - const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH); + const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL); + const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH); const TensorShape wei_shape = wei->tensor_shape(); const size_t kernel_c = wei_shape[idx_c]; const size_t kernel_w = wei_shape[idx_w]; @@ -251,9 +269,9 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G77_f16(const IT desc.export_input_to_cl_image = false; desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier); - if(depth_multiplier == 1) + if (depth_multiplier == 1) { - if(desc.export_weights_to_cl_image == false) + if (desc.export_weights_to_cl_image == false) { desc.n0 = 8; } @@ -264,11 +282,11 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G77_f16(const IT } else { - if((depth_multiplier % 4) == 0) + if ((depth_multiplier % 4) == 0) { desc.n0 = 4; } - else if((depth_multiplier % 2) == 0) + else if ((depth_multiplier % 2) == 0) { desc.n0 = 2; } @@ -279,14 +297,15 @@ DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G77_f16(const IT } // Note: If we reduce n0, export to cl_image must be false - ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true)); + ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && + (desc.export_weights_to_cl_image == true)); desc.n0 = adjust_vec_size(desc.n0, kernel_c); // Set m0 only if stride_x == 1 and dilation_x == 1 - if(conv_info.stride().first == 1 && dilation.x() == 1) + if (conv_info.stride().first == 1 && dilation.x() == 1) { - if((kernel_w >= 9) || (kernel_w == 1)) + if ((kernel_w >= 9) || (kernel_w == 1)) { desc.m0 = 1; } |