aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorViet-Hoa Do <viet-hoa.do@arm.com>2023-03-01 15:46:10 +0000
committerViet-Hoa Do <viet-hoa.do@arm.com>2023-03-02 09:46:28 +0000
commite2e6d745c940cdfd8c3340fd1227dbef1badfb3c (patch)
tree9af6bc878aa397025524d2eca303e0d4a5a1693a
parentbbf2e7477be984702e1a51f2a23910ee8349b867 (diff)
downloadComputeLibrary-e2e6d745c940cdfd8c3340fd1227dbef1badfb3c.tar.gz
Fix direct conv2d in dynamic fusion
* Put input and output tensor shape value directly to the CL code. * Use texture for weights when it is possible. Resolves: COMPMID-5938 Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Change-Id: Ib53b310a80ce857eac36564b352136fdde55b131 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9249 Reviewed-by: SiCong Li <sicong.li@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp8
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h5
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp55
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp26
4 files changed, 22 insertions, 72 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
index c8e682f34a..3965deced1 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
@@ -35,15 +35,9 @@ namespace experimental
{
namespace dynamic_fusion
{
-ClComponentDirectConv2dSettings &ClComponentDirectConv2dSettings::export_to_cl_image(bool cl_image)
-{
- _export_to_cl_image = cl_image;
- return *this;
-}
-
bool ClComponentDirectConv2dSettings::export_to_cl_image() const
{
- return _export_to_cl_image;
+ return _desc.export_weights_to_cl_image;
}
ClComponentDirectConv2dSettings &ClComponentDirectConv2dSettings::fast_relaxed_math(bool fast_relaxed_math)
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
index c3a70ef3ae..8e555dce57 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -47,8 +47,6 @@ class Conv2dAttributes;
class ClComponentDirectConv2dSettings
{
public:
- /** Set export_to_cl_image flag */
- ClComponentDirectConv2dSettings &export_to_cl_image(bool cl_image);
/** Get export_to_cl_image flag */
bool export_to_cl_image() const;
@@ -63,7 +61,6 @@ public:
DirectConvComputeKernelInfo direct_conv_descriptor() const;
private:
- bool _export_to_cl_image{ false };
bool _fast_relaxed_math{ true };
DirectConvComputeKernelInfo _desc{}; // Direct convolution descriptor
};
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
index 690371f910..e00f09563f 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
@@ -45,49 +45,6 @@ namespace dynamic_fusion
{
namespace
{
-bool export_to_cl_image_support(const ITensorInfo *tensor, GPUTarget gpu_target, const cl::Device &device, DataLayout data_layout)
-{
- if(tensor->tensor_shape()[0] % 4 || (data_layout != DataLayout::NHWC))
- {
- return false;
- }
-
- // If not floating point
- if(!is_data_type_float(tensor->data_type()))
- {
- return false;
- }
-
- if(gpu_target == GPUTarget::G71 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
- {
- return false;
- }
-
- // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
- if(!image2d_from_buffer_supported(device))
- {
- return false;
- }
-
- // Check cl image pitch alignment
- if(get_cl_image_pitch_alignment(device) == 0)
- {
- return false;
- }
-
- const size_t image_w = tensor->tensor_shape()[0] / 4;
- const size_t image_h = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3];
- const size_t max_image_w = device.getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
- const size_t max_image_h = device.getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
-
- if(image_w > max_image_w || image_h > max_image_h)
- {
- return false;
- }
-
- return true;
-}
-
DirectConvComputeKernelInfo config_direct_convolution_nhwc(const ITensorInfo *src, const ITensorInfo *weights, const PadStrideInfo &conv_info)
{
// Get GPU target
@@ -126,7 +83,6 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
TensorInfo dst_info_to_validate;
const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
- const DataLayout data_layout = src->data_layout();
if(dst != nullptr)
{
dst_info_to_validate_ptr = dst;
@@ -151,9 +107,6 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
auto settings = ClComponentDirectConv2d::Settings();
- settings.export_to_cl_image(
- export_to_cl_image_support(src, gpu_target, cl_compile_ctx->get_device(), data_layout));
-
settings.fast_relaxed_math(
(gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
&& (dst_info_to_validate_ptr->data_type() == DataType::F32 || dst_info_to_validate_ptr->data_type() == DataType::F16));
@@ -251,7 +204,6 @@ ITensorInfo *GpuConv2d::create_op(GpuWorkloadSketch &sketch,
const auto sketch_ctx = sketch.implementation().context();
- const auto data_layout = src->data_layout();
const auto gpu_target = sketch_ctx->gpu_target();
if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
@@ -266,20 +218,17 @@ ITensorInfo *GpuConv2d::create_op(GpuWorkloadSketch &sketch,
auto settings = ClComponentDirectConv2d::Settings();
- settings.export_to_cl_image(
- export_to_cl_image_support(src, gpu_target, cl_compile_ctx->get_device(), data_layout));
-
settings.fast_relaxed_math(
(gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
&& (dst->data_type() == DataType::F32 || dst->data_type() == DataType::F16));
+ settings.direct_conv_descriptor(desc);
+
if(settings.export_to_cl_image())
{
arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(wei);
}
- settings.direct_conv_descriptor(desc);
-
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC_0, src);
arguments.add_const_tensor(ACL_SRC_1, wei);
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
index e69103e263..ca531fe28e 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
@@ -91,12 +91,12 @@ TILE(uint, M0, 1, g_dst_indirect_y);
{
#define _IWEI_WIDTH {{WEI_WIDTH}}
#define _IWEI_HEIGHT {{WEI_HEIGHT}}
-#define _ISRC_WIDTH {{src}}_w
-#define _ISRC_HEIGHT {{src}}_h
-#define _ISRC_CHANNELS {{src}}_c
-#define _IDST_WIDTH {{arg_dst}}_w
-#define _IDST_HEIGHT {{arg_dst}}_h
-#define _IDST_CHANNELS {{arg_dst}}_c
+#define _ISRC_WIDTH {{SRC_WIDTH}}
+#define _ISRC_HEIGHT {{SRC_HEIGHT}}
+#define _ISRC_CHANNELS {{SRC_CHANNELS}}
+#define _IDST_WIDTH {{DST_WIDTH}}
+#define _IDST_HEIGHT {{DST_HEIGHT}}
+#define _IDST_CHANNELS {{DST_CHANNELS}}
#define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT)
TILE(int, M0, 1, xi);
@@ -214,8 +214,8 @@ code += R"_(
code += R"_(
LOOP_UNROLLING(int, i, 0, 1, M0,
{
- g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{arg_dst}}_w * {{arg_dst}}_h) - 1);
- g_dst_indirect_y[i].v += g_ind_2 * (int)({{arg_dst}}_w * {{arg_dst}}_h);
+ g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{DST_WIDTH}} * {{DST_HEIGHT}}) - 1);
+ g_dst_indirect_y[i].v += g_ind_2 * (int)({{DST_WIDTH}} * {{DST_HEIGHT}});
})
}
//------------------ END KERNEL {{meta_kernel_id}} ---------------------
@@ -294,9 +294,19 @@ TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable,
}
const auto width_idx = 1;
const auto height_idx = 2;
+ const auto channel_idx = 0;
+
+ lut["SRC_WIDTH"] = _src->dimension(width_idx);
+ lut["SRC_HEIGHT"] = _src->dimension(height_idx);
+ lut["SRC_CHANNELS"] = _src->dimension(channel_idx);
+
lut["WEI_WIDTH"] = _weight->dimension(width_idx);
lut["WEI_HEIGHT"] = _weight->dimension(height_idx);
+ lut["DST_WIDTH"] = _dst->dimension(width_idx);
+ lut["DST_HEIGHT"] = _dst->dimension(height_idx);
+ lut["DST_CHANNELS"] = _dst->dimension(channel_idx);
+
lut["STRIDE_X"] = _attributes.stride().x();
lut["STRIDE_Y"] = _attributes.stride().y();