aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp')
-rw-r--r--src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp63
1 files changed, 38 insertions, 25 deletions
diff --git a/src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp b/src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp
index fab6c36032..54c48986fc 100644
--- a/src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp
+++ b/src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp
@@ -32,6 +32,7 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
@@ -55,17 +56,21 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
const PadStrideInfo conv_info = winograd_info.convolution_info;
const Size2D output_tile_size = winograd_info.output_tile_size;
const Size2D kernel_size = winograd_info.kernel_size;
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.stride().first != 1 || conv_info.stride().second != 1, "Winograd input transform only supports unit strides");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!cl_winograd_convolution_layer_supported(output_tile_size, kernel_size, input->data_layout()), "Winograd input transform not supported");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.stride().first != 1 || conv_info.stride().second != 1,
+ "Winograd input transform only supports unit strides");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(
+ !cl_winograd_convolution_layer_supported(output_tile_size, kernel_size, input->data_layout()),
+ "Winograd input transform not supported");
ARM_COMPUTE_UNUSED(conv_info);
ARM_COMPUTE_UNUSED(output_tile_size);
ARM_COMPUTE_UNUSED(kernel_size);
// Validate configured output
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
- const TensorShape output_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*input, winograd_info);
+ const TensorShape output_shape =
+ misc::shape_calculator::compute_winograd_input_transform_shape(*input, winograd_info);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
@@ -74,7 +79,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
return Status{};
}
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const WinogradInfo &winograd_info)
+std::pair<Status, Window>
+validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const WinogradInfo &winograd_info)
{
ARM_COMPUTE_UNUSED(output);
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
@@ -82,7 +88,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
bool window_changed = false;
int num_elems_processed_per_iteration = 1;
- if(input->data_layout() == DataLayout::NHWC)
+ if (input->data_layout() == DataLayout::NHWC)
{
// In the case of FP16 computation, we can perform more
// output feature maps in a single work-item.
@@ -94,9 +100,9 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
const size_t dim0 = input->dimension(0);
const size_t k_sz = winograd_info.kernel_size.area();
const bool cond = dt == DataType::F16 && ((dim0 % 2) == 0);
- if(cond)
+ if (cond)
{
- if(k_sz == 3 || k_sz == 9)
+ if (k_sz == 3 || k_sz == 9)
{
num_elems_processed_per_iteration = 2;
}
@@ -104,7 +110,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
}
Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
- if(input->data_layout() == DataLayout::NCHW)
+ if (input->data_layout() == DataLayout::NCHW)
{
const PadStrideInfo conv_info = winograd_info.convolution_info;
const Size2D output_tile_size = winograd_info.output_tile_size;
@@ -113,11 +119,13 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
unsigned int num_elems_read_per_iteration_x = output_tile_size.width + kernel_size.width - 1;
unsigned int num_elems_read_per_iteration_y = output_tile_size.height + kernel_size.height - 1;
- AccessWindowRectangle input_access(input, -conv_info.pad_left(), -conv_info.pad_top(), num_elems_read_per_iteration_x, num_elems_read_per_iteration_y);
+ AccessWindowRectangle input_access(input, -conv_info.pad_left(), -conv_info.pad_top(),
+ num_elems_read_per_iteration_x, num_elems_read_per_iteration_y);
window_changed = update_window_and_padding(win, input_access);
}
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ Status err =
+ (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
return std::make_pair(err, win);
}
} // namespace
@@ -132,12 +140,15 @@ BorderSize ClWinogradInputTransformKernel::border_size() const
return _border_size;
}
-void ClWinogradInputTransformKernel::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const WinogradInfo &winograd_info)
+void ClWinogradInputTransformKernel::configure(const ClCompileContext &compile_context,
+ ITensorInfo *src,
+ ITensorInfo *dst,
+ const WinogradInfo &winograd_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, winograd_info));
- auto padding_info = get_padding_info({ src, dst });
+ auto padding_info = get_padding_info({src, dst});
const PadStrideInfo conv_info = winograd_info.convolution_info;
const Size2D output_tile_size = winograd_info.output_tile_size;
@@ -150,14 +161,13 @@ void ClWinogradInputTransformKernel::configure(const ClCompileContext &compile_c
// Compute the number of output tiles along the x and y direction of size "output_tile_size"
const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(src->dimension(idx_w), src->dimension(idx_h)),
- kernel_size,
- output_tile_size,
- conv_info);
+ kernel_size, output_tile_size, conv_info);
_num_tiles_x = num_tiles.width;
_num_tiles_y = num_tiles.height;
- const TensorShape output_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*src, winograd_info);
+ const TensorShape output_shape =
+ misc::shape_calculator::compute_winograd_input_transform_shape(*src, winograd_info);
// Output auto initialization if not yet initialized
auto_init_if_empty(*dst, src->clone()->set_tensor_shape(output_shape));
@@ -174,7 +184,7 @@ void ClWinogradInputTransformKernel::configure(const ClCompileContext &compile_c
_src_height = src->dimension(idx_h);
CLBuildOptions build_opts;
- if(_data_layout == DataLayout::NHWC)
+ if (_data_layout == DataLayout::NHWC)
{
build_opts.add_option("-DNHWC");
build_opts.add_option("-DN0=" + support::cpp11::to_string(win_config.second.x().step()));
@@ -201,13 +211,14 @@ void ClWinogradInputTransformKernel::configure(const ClCompileContext &compile_c
}
// Create kernel
- std::string kernel_name = "winograd_input_transform_" + output_tile_size.to_string() + "_" + kernel_size.to_string();
+ std::string kernel_name =
+ "winograd_input_transform_" + output_tile_size.to_string() + "_" + kernel_size.to_string();
// Get the maximum dimension from the tile size
const unsigned int tile_max_dim = std::max(output_tile_size.width, output_tile_size.height);
// Check optimized kernel if output_dims == 2x2
- if((tile_max_dim == 2) && (_data_layout == DataLayout::NCHW))
+ if ((tile_max_dim == 2) && (_data_layout == DataLayout::NCHW))
{
_step_z = (src->dimension(2) % 2) != 0 ? 1 : 2;
}
@@ -239,11 +250,14 @@ void ClWinogradInputTransformKernel::configure(const ClCompileContext &compile_c
_config_id += lower_string(string_from_data_layout(_data_layout));
}
-Status ClWinogradInputTransformKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const WinogradInfo &winograd_info)
+Status ClWinogradInputTransformKernel::validate(const ITensorInfo *src,
+ const ITensorInfo *dst,
+ const WinogradInfo &winograd_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, winograd_info));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(src->clone().get(), dst->clone().get(), winograd_info).first);
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(src->clone().get(), dst->clone().get(), winograd_info).first);
return Status{};
}
@@ -263,7 +277,7 @@ void ClWinogradInputTransformKernel::run_op(ITensorPack &tensors, const Window &
// Collapse window
Window window_collapsed = window.collapse_if_possible(IClKernel::window(), Window::DimZ);
- if(_data_layout == DataLayout::NHWC)
+ if (_data_layout == DataLayout::NHWC)
{
Window slice = window_collapsed.first_slice_window_3D();
slice.set(1, Window::Dimension(0, _num_tiles_x * _num_tiles_y, 1));
@@ -298,8 +312,7 @@ void ClWinogradInputTransformKernel::run_op(ITensorPack &tensors, const Window &
add_3D_tensor_argument(idx, dst, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(window_collapsed.slide_window_slice_3D(slice));
+ } while (window_collapsed.slide_window_slice_3D(slice));
}
}
} // namespace kernels