Improve start-up time for depthwise convolution

- Pass source and destination tensor dimension info at runtime Resolves: COMPMID-4887 Signed-off-by: Sheri Zhang <sheri.zhang@arm.com> Change-Id: Ib7c9f3ce6fb7cef600f7b0cd0fadafa4fa6888a1 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6635 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
author: Sheri Zhang <sheri.zhang@arm.com> 2021-11-10 13:08:40 +0000
committer: Sheri Zhang <sheri.zhang@arm.com> 2021-11-17 10:31:59 +0000
commit: e3b197410842652f0a78d04fe7b2c333cbeabab6 (patch)
tree: 128d7b40239886d71a84605a8eb8f17c1d36e561 /src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
parent: d7154dbf0f4a347f2f35f2475a893f1631c5ee1a (diff)
download: ComputeLibrary-e3b197410842652f0a78d04fe7b2c333cbeabab6.tar.gz
1 files changed, 3 insertions, 7 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
index 2b74f91a05..61c8d90f78 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
@@ -215,15 +215,11 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &
     build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(conv_info.act_info.activation())));
     build_opts.add_option("-DDEPTH_MULTIPLIER=" + support::cpp11::to_string(conv_info.depth_multiplier));
     build_opts.add_option("-DSRC_TENSOR_TYPE=BUFFER");
-    build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(_input->info()->dimension(1)));
-    build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(_input->info()->dimension(2)));
     // Note: SRC_DATA_TYPE must have the same data type of WEI_DATA_TYPE. In quantized, we could
     // have a case where the data types for the activation and weights are different. However, since the implementation
     // only works when both have same data type, we have to change the offset to take into account this aspect
     build_opts.add_option("-DSRC_DATA_TYPE=" + get_cl_type_from_data_type(_input->info()->data_type()));
     build_opts.add_option("-DDST_TENSOR_TYPE=BUFFER");
-    build_opts.add_option("-DDST_WIDTH=" + support::cpp11::to_string(_output->info()->dimension(1)));
-    build_opts.add_option("-DDST_HEIGHT=" + support::cpp11::to_string(_output->info()->dimension(2)));
     build_opts.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(_output->info()->data_type()));
     build_opts.add_option_if_else(_export_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER");
     build_opts.add_option("-DWEI_WIDTH=" + support::cpp11::to_string(_weights->info()->dimension(1)));
@@ -290,7 +286,6 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &
     {
         kernel_name = "dwc_native_fp_nhwc";
         build_opts.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
-        build_opts.add_option("-DZERO_VALUE=" + support::cpp11::to_string(0));
         build_opts.add_option_if(conv_info.act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(conv_info.act_info.a()));
         build_opts.add_option_if(conv_info.act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(conv_info.act_info.b()));
     }
@@ -358,8 +353,9 @@ void CLDepthwiseConvolutionLayerNativeKernel::run(const Window &window, cl::Comm
     }
 
     unsigned int idx = 0;
-    add_4D_tensor_argument(idx, _input, slice);
-    add_4D_tensor_argument(idx, _output, slice);
+    add_4d_tensor_nhwc_argument(idx, _input);
+    add_4d_tensor_nhwc_argument(idx, _output);
+
     if(_export_to_cl_image)
     {
         _kernel.setArg(idx++, weights_cl_image);
author	Sheri Zhang <sheri.zhang@arm.com>	2021-11-10 13:08:40 +0000
committer	Sheri Zhang <sheri.zhang@arm.com>	2021-11-17 10:31:59 +0000
commit	e3b197410842652f0a78d04fe7b2c333cbeabab6 (patch)
tree	128d7b40239886d71a84605a8eb8f17c1d36e561 /src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
parent	d7154dbf0f4a347f2f35f2475a893f1631c5ee1a (diff)
download	ComputeLibrary-e3b197410842652f0a78d04fe7b2c333cbeabab6.tar.gz