aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp')
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp26
1 files changed, 23 insertions, 3 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
index d50e4d695e..9e5585cba4 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -147,6 +147,7 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con
// Configure the local work size for Bifrost with a value obtained
// via exhaustive autotuning for the MobileNets tensor shapes.
const GPUTarget gpu_target = get_target();
+ const bool is_bifrost = gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72);
// Configure kernel window
unsigned int num_elems_read_per_iteration_x = 0;
@@ -178,12 +179,31 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con
num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * _conv_stride_x;
break;
}
+ if(is_bifrost)
+ {
+ if(_conv_stride_x == 1 && _conv_stride_y == 1)
+ {
+ kernel_name = "depthwise_convolution_3x3_stridex1_stridey1_bifrost_f16";
+ num_elems_read_per_iteration_x = 8;
+ num_elems_written_per_iteration_x = 4;
+ num_elems_read_per_iteration_y = 6;
+ num_elems_written_per_iteration_y = 4;
+ }
+ else if(_conv_stride_x == 2 && _conv_stride_y == 2)
+ {
+ kernel_name = "depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16";
+ num_elems_read_per_iteration_x = 10;
+ num_elems_written_per_iteration_x = 4;
+ num_elems_read_per_iteration_y = 5;
+ num_elems_written_per_iteration_y = 2;
+ }
+ }
}
- else if(input->info()->data_type() == DataType::F32 && gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72))
+ else if(input->info()->data_type() == DataType::F32 && is_bifrost)
{
if(_conv_stride_x == 1 && _conv_stride_y == 1)
{
- kernel_name = "depthwise_convolution_3x3_stridex1_stridey1_bifrost";
+ kernel_name = "depthwise_convolution_3x3_stridex1_stridey1_bifrost_f32";
num_elems_read_per_iteration_x = 4;
num_elems_read_per_iteration_y = 6;
num_elems_written_per_iteration_x = 2;
@@ -191,7 +211,7 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con
}
else if(_conv_stride_x == 2 && _conv_stride_y == 2)
{
- kernel_name = "depthwise_convolution_3x3_stridex2_stridey2_bifrost";
+ kernel_name = "depthwise_convolution_3x3_stridex2_stridey2_bifrost_f32";
num_elems_read_per_iteration_x = 6;
num_elems_read_per_iteration_y = 5;
num_elems_written_per_iteration_x = 2;