diff options
Diffstat (limited to 'src/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp')
-rw-r--r-- | src/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp | 46 |
1 files changed, 31 insertions, 15 deletions
diff --git a/src/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp b/src/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp index 7148a4c85c..58c01d4da5 100644 --- a/src/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp +++ b/src/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp @@ -29,10 +29,11 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/StringUtils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -60,14 +61,18 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!cl_winograd_convolution_layer_supported(output_tile_size, kernel_size, input->data_layout()), "Winograd filter transform not supported"); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_w) != kernel_size.width || input->dimension(idx_h) != kernel_size.height); + ARM_COMPUTE_RETURN_ERROR_ON_MSG( + !cl_winograd_convolution_layer_supported(output_tile_size, kernel_size, input->data_layout()), + "Winograd filter transform not supported"); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_w) != kernel_size.width || + input->dimension(idx_h) != kernel_size.height); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); // Checks performed when output is configured - if(output->total_size() != 0) + if (output->total_size() != 0) { - const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(compute_winograd_filter_transform_shape(*input, winograd_info)); + const TensorInfo tensor_info_output = + input->clone()->set_tensor_shape(compute_winograd_filter_transform_shape(*input, winograd_info)); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -81,11 +86,15 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_UNUSED(output); - const unsigned int num_elems_processed_per_iteration_x = input->data_layout() == DataLayout::NCHW ? input->dimension(0) : 1; + const unsigned int num_elems_processed_per_iteration_x = + input->data_layout() == DataLayout::NCHW ? input->dimension(0) : 1; const unsigned int num_elems_processed_per_iteration_y = input->dimension(1); - const unsigned int num_elems_read_per_iteration_z = input->data_layout() == DataLayout::NCHW ? 1 : input->dimension(2); + const unsigned int num_elems_read_per_iteration_z = + input->data_layout() == DataLayout::NCHW ? 1 : input->dimension(2); - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y, num_elems_read_per_iteration_z)); + Window win = + calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y, + num_elems_read_per_iteration_z)); Window win_collapsed = win.collapse(win, Window::DimZ); return std::make_pair(Status{}, win_collapsed); } @@ -96,21 +105,25 @@ ClWinogradFilterTransformKernel::ClWinogradFilterTransformKernel() _type = CLKernelType::WINOGRAD; } -void ClWinogradFilterTransformKernel::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const WinogradInfo &winograd_info) +void ClWinogradFilterTransformKernel::configure(const ClCompileContext &compile_context, + ITensorInfo *src, + ITensorInfo *dst, + const WinogradInfo &winograd_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); // Output auto initialization if not yet initialized - auto_init_if_empty(*dst, src->clone()->set_tensor_shape(compute_winograd_filter_transform_shape(*src, winograd_info))); + auto_init_if_empty(*dst, + src->clone()->set_tensor_shape(compute_winograd_filter_transform_shape(*src, winograd_info))); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, winograd_info)); - auto padding_info = get_padding_info({ src, dst }); + auto padding_info = get_padding_info({src, dst}); // Set build options CLBuildOptions build_opts; // For NHWC layouts pass tensor dimesions at runtime - if(src->data_layout() == DataLayout::NHWC) + if (src->data_layout() == DataLayout::NHWC) { _src_dim_z = src->dimension(2); } @@ -125,7 +138,8 @@ void ClWinogradFilterTransformKernel::configure(const ClCompileContext &compile_ const Size2D output_tile_size = winograd_info.output_tile_size; // Create kernel - std::string kernel_name = "winograd_filter_transform_" + output_tile_size.to_string() + "_" + kernel_size.to_string() + "_" + lower_string(string_from_data_layout(src->data_layout())); + std::string kernel_name = "winograd_filter_transform_" + output_tile_size.to_string() + "_" + + kernel_size.to_string() + "_" + lower_string(string_from_data_layout(src->data_layout())); // A macro guard to compile ONLY the kernel of interest build_opts.add_option("-D" + upper_string(kernel_name)); @@ -138,7 +152,9 @@ void ClWinogradFilterTransformKernel::configure(const ClCompileContext &compile_ ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status ClWinogradFilterTransformKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const WinogradInfo &winograd_info) +Status ClWinogradFilterTransformKernel::validate(const ITensorInfo *src, + const ITensorInfo *dst, + const WinogradInfo &winograd_info) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, winograd_info)); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(src->clone().get(), dst->clone().get()).first); @@ -161,7 +177,7 @@ void ClWinogradFilterTransformKernel::run_op(ITensorPack &tensors, const Window unsigned int idx = 0; add_4D_tensor_argument(idx, src, window); add_3D_tensor_argument(idx, dst, window_out); - if(src->info()->data_layout() == DataLayout::NHWC) + if (src->info()->data_layout() == DataLayout::NHWC) { _kernel.setArg<cl_uint>(idx++, _src_dim_z); } |