aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp')
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp129
1 files changed, 16 insertions, 113 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
index caccbb1830..b08af61d8f 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
@@ -31,6 +31,8 @@
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h"
#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
+#include "src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h"
+#include "src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h"
namespace arm_compute
{
@@ -40,115 +42,6 @@ namespace dynamic_fusion
{
namespace
{
-bool export_weights_to_cl_image_heuristic(const ITensorInfo *weights, unsigned int depth_multiplier, GPUTarget gpu_target)
-{
- if(!export_to_cl_image(weights))
- {
- return false;
- }
-
- const size_t idx_w = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
- const size_t kernel_w = weights->tensor_shape()[idx_w];
- const size_t kernel_h = weights->tensor_shape()[idx_h];
-
- if(gpu_target == GPUTarget::G71 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
- {
- return false;
- }
-
- if((kernel_w == 1) && (kernel_h == 1))
- {
- return false;
- }
-
- if(depth_multiplier > 1)
- {
- if((depth_multiplier % 4) != 0)
- {
- return false;
- }
- }
-
- return true;
-}
-
-void initialize_dwc_native_compute_info(DWCComputeKernelInfo &dwc_compute_info, const ITensorInfo *input, const ITensorInfo *weights,
- const DepthwiseConv2dAttributes &attributes, const GPUTarget gpu_target)
-{
- const unsigned int depth_multiplier = attributes.depth_multiplier();
-
- // Floating point path
- // First check if we can export to cl_image.
- dwc_compute_info.export_input_to_cl_image = false;
- dwc_compute_info.export_weights_to_cl_image = export_weights_to_cl_image_heuristic(weights, depth_multiplier, gpu_target);
-
- // Set n0
- if(depth_multiplier == 1)
- {
- if(dwc_compute_info.export_weights_to_cl_image == false && weights->data_type() == DataType::F16)
- {
- dwc_compute_info.n0 = 8;
- }
- else
- {
- dwc_compute_info.n0 = 4;
- }
- }
- else
- {
- if((depth_multiplier % 4) == 0)
- {
- dwc_compute_info.n0 = 4;
- }
- else if((depth_multiplier % 2) == 0)
- {
- dwc_compute_info.n0 = 2;
- }
- else
- {
- dwc_compute_info.n0 = 1;
- }
- }
-
- dwc_compute_info.n0 = adjust_vec_size(dwc_compute_info.n0, weights->dimension(0));
-
- // Set m0 only if stride_x == 1 and dilation_x == 1
- if(attributes.stride().x() == 1 && attributes.dilation().x() == 1)
- {
- const size_t idx_w = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
- const size_t kernel_w = weights->tensor_shape()[idx_w];
-
- if((kernel_w >= 9) || (kernel_w == 1))
- {
- dwc_compute_info.m0 = 1;
- }
- else
- {
- if(weights->data_type() == DataType::F16)
- {
- if((input->dimension(1) % 5) == 0)
- {
- dwc_compute_info.m0 = 5;
- }
- else
- {
- dwc_compute_info.m0 = 4;
- }
- }
- else
- {
- dwc_compute_info.m0 = 2;
- }
- }
- }
- else
- {
- dwc_compute_info.m0 = 1;
- }
- return;
-}
-
void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, const ITensorInfo *wei, const DepthwiseConv2dAttributes &attributes)
{
if(dst->total_size() == 0U)
@@ -202,8 +95,13 @@ Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &cont
const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
auto settings = ClComponentDepthwiseConv2d::Settings();
- DWCComputeKernelInfo dwc_info;
- initialize_dwc_native_compute_info(dwc_info, src, wei, attributes, gpu_target);
+ const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
+ attributes.pad().right,
+ attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);
+
+ // Get the depthwise convolution compute parameters
+ auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
+ const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
settings.fast_relaxed_math(
(gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
@@ -294,8 +192,13 @@ void GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sketch,
const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
auto settings = ClComponentDepthwiseConv2d::Settings();
- DWCComputeKernelInfo dwc_info;
- initialize_dwc_native_compute_info(dwc_info, src, wei, attributes, gpu_target);
+ const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
+ attributes.pad().right,
+ attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);
+
+ // Get the depthwise convolution compute parameters
+ auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
+ const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
settings.is_fma_available(get_arch_from_target(gpu_target) != GPUTarget::MIDGARD)
.m0(dwc_info.m0)