diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2022-12-28 10:28:20 +0000 |
---|---|---|
committer | Gunes Bayir <gunes.bayir@arm.com> | 2022-12-29 13:14:10 +0000 |
commit | 8a2d7cecea194ac8eafa91721fb5b09ae01e5971 (patch) | |
tree | 03c2cf553751504bf575450dc3b22deea911af7f | |
parent | 939b21ad4b9ed15d43b4ee8b17484e57ed55a01f (diff) | |
download | ComputeLibrary-8a2d7cecea194ac8eafa91721fb5b09ae01e5971.tar.gz |
Extend Transposed Conv. for tiles with N0>1
Partially Resolves: COMPMID-5724
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Change-Id: I0aeddddcdd87c8c79f6dae9a76ffdc2ba0c08e17
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8883
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r-- | src/core/CL/cl_kernels/nhwc/transposed_convolution.cl | 4 | ||||
-rw-r--r-- | src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp | 12 |
2 files changed, 8 insertions, 8 deletions
diff --git a/src/core/CL/cl_kernels/nhwc/transposed_convolution.cl b/src/core/CL/cl_kernels/nhwc/transposed_convolution.cl index 1ca282caea..fe6182fc95 100644 --- a/src/core/CL/cl_kernels/nhwc/transposed_convolution.cl +++ b/src/core/CL/cl_kernels/nhwc/transposed_convolution.cl @@ -52,8 +52,8 @@ * @note If bias exists, the compile time argument -DHAS_BIAS should be passed * @note Only the following configurations of M0, N0 and K0 are currently supported: * - M0 = 1 - * - N0 = 1 - * - K0 = 2, 3, 4, 8, 16 + * - N0 = 1, 2, 3, 4, 8, 16 + * - K0 = 1, 2, 3, 4, 8, 16 * * @note In case of QASYMM8/QASYMM8_SIGNED, the following extra information must be passed at compile time: * - -DIS_QUANTIZED diff --git a/src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp b/src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp index 714ca8e6d1..1390451ed8 100644 --- a/src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp +++ b/src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp @@ -130,8 +130,13 @@ void ClTransposedConvolutionKernel::configure(const CLCompileContext &compile_co Window win; output_shape.collapse(2U, 1U); // Collapse width and height into single dimension + const unsigned int n0 = adjust_vec_size(16 / output->element_size(), output_channels); + const unsigned int m0 = 1; + const unsigned int k0 = adjust_vec_size(16 / input->element_size(), input_channels); + const unsigned int partial_store_n0 = output_channels % n0; + // Create window and update padding - win = calculate_max_window(output_shape, Steps(1, 1)); + win = calculate_max_window(output_shape, Steps(n0, m0)); ICLKernel::configure_internal(win); const std::string kernel_name = "transposed_convolution_nhwc"; @@ -140,11 +145,6 @@ void ClTransposedConvolutionKernel::configure(const CLCompileContext &compile_co const DataType input_data_type = input->data_type(); const PaddingInfo strides = deconv_info.stride(); - const unsigned int n0 = 1; - const unsigned int m0 = 1; - const unsigned int k0 = adjust_vec_size(16 / input->element_size(), input_channels); - const unsigned int partial_store_n0 = output_channels % n0; - if(biases != nullptr) { build_options.add_option(std::string("-DHAS_BIAS")); |