aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2022-12-28 10:28:20 +0000
committerGunes Bayir <gunes.bayir@arm.com>2022-12-29 13:14:10 +0000
commit8a2d7cecea194ac8eafa91721fb5b09ae01e5971 (patch)
tree03c2cf553751504bf575450dc3b22deea911af7f /src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp
parent939b21ad4b9ed15d43b4ee8b17484e57ed55a01f (diff)
downloadComputeLibrary-8a2d7cecea194ac8eafa91721fb5b09ae01e5971.tar.gz
Extend Transposed Conv. for tiles with N0>1
Partially Resolves: COMPMID-5724 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Change-Id: I0aeddddcdd87c8c79f6dae9a76ffdc2ba0c08e17 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8883 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp')
-rw-r--r--src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp12
1 files changed, 6 insertions, 6 deletions
diff --git a/src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp b/src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp
index 714ca8e6d1..1390451ed8 100644
--- a/src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp
+++ b/src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp
@@ -130,8 +130,13 @@ void ClTransposedConvolutionKernel::configure(const CLCompileContext &compile_co
Window win;
output_shape.collapse(2U, 1U); // Collapse width and height into single dimension
+ const unsigned int n0 = adjust_vec_size(16 / output->element_size(), output_channels);
+ const unsigned int m0 = 1;
+ const unsigned int k0 = adjust_vec_size(16 / input->element_size(), input_channels);
+ const unsigned int partial_store_n0 = output_channels % n0;
+
// Create window and update padding
- win = calculate_max_window(output_shape, Steps(1, 1));
+ win = calculate_max_window(output_shape, Steps(n0, m0));
ICLKernel::configure_internal(win);
const std::string kernel_name = "transposed_convolution_nhwc";
@@ -140,11 +145,6 @@ void ClTransposedConvolutionKernel::configure(const CLCompileContext &compile_co
const DataType input_data_type = input->data_type();
const PaddingInfo strides = deconv_info.stride();
- const unsigned int n0 = 1;
- const unsigned int m0 = 1;
- const unsigned int k0 = adjust_vec_size(16 / input->element_size(), input_channels);
- const unsigned int partial_store_n0 = output_channels % n0;
-
if(biases != nullptr)
{
build_options.add_option(std::string("-DHAS_BIAS"));