From 8aaf93e8c12ce93d3d0082d4f4b70376f15536da Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 11 Oct 2018 17:33:32 +0100 Subject: COMPMID-1632 Add CLL2NormalizationLayer for NHWC and FP32 Change-Id: Iae22554d5fe893fd22a000eab5bfd8275ea06eb3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/154102 Reviewed-by: Georgios Pinitas Tested-by: bsgcomp --- src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp | 67 +++++++++++++++++------- 1 file changed, 49 insertions(+), 18 deletions(-) (limited to 'src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp') diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp index 54ed51eda2..cfd04ef392 100644 --- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp @@ -49,9 +49,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, cons ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, sum, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0"); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); // Reduce shape on axis @@ -62,9 +61,9 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, cons if(output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(input->tensor_shape(), output->tensor_shape()); - ARM_COMPUTE_RETURN_ERROR_ON(output->data_layout() != DataLayout::NCHW); } return Status{}; @@ -110,11 +109,19 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("l2_normalize", build_opts)); + const DataLayout data_layout = input->info()->data_layout(); + _kernel = static_cast(CLKernelLibrary::get().create_kernel("l2_normalize_" + lower_string(string_from_data_layout(data_layout)), build_opts)); // Set epsilon argument - unsigned int idx = num_arguments_per_1D_tensor() * 3; - _kernel.setArg(idx, _epsilon); + unsigned int idx = data_layout == DataLayout::NCHW ? num_arguments_per_1D_tensor() * 3 : num_arguments_per_2D_tensor() * 3; + if(input->info()->data_type() == DataType::F32) + { + _kernel.setArg(idx, _epsilon); + } + else + { + _kernel.setArg(idx, _epsilon); + } // Configure kernel window auto win_config = validate_and_configure_window(_input->info(), _output->info()); @@ -137,18 +144,42 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); Window window_sum(window); - window_sum.set(Window::DimX, Window::Dimension(0, 0, 0)); - - Window in_slice = window.first_slice_window_1D(); - Window sum_slice = window_sum.first_slice_window_1D(); - do + switch(_input->info()->data_layout()) { - unsigned int idx = 0; - add_1D_tensor_argument(idx, _input, in_slice); - add_1D_tensor_argument(idx, _sum, sum_slice); - add_1D_tensor_argument(idx, _output, in_slice); - enqueue(queue, *this, in_slice); + case DataLayout::NCHW: + { + window_sum.set(Window::DimX, Window::Dimension(0, 0, 0)); + Window in_slice = window.first_slice_window_1D(); + Window sum_slice = window_sum.first_slice_window_1D(); + do + { + unsigned int idx = 0; + add_1D_tensor_argument(idx, _input, in_slice); + add_1D_tensor_argument(idx, _sum, sum_slice); + add_1D_tensor_argument(idx, _output, in_slice); + enqueue(queue, *this, in_slice); + } + while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice)); + } + break; + case DataLayout::NHWC: + { + window_sum.set(Window::DimY, Window::Dimension(0, 0, 0)); + Window in_slice = window.first_slice_window_2D(); + Window sum_slice = window_sum.first_slice_window_2D(); + do + { + unsigned int idx = 0; + add_2D_tensor_argument(idx, _input, in_slice); + add_2D_tensor_argument(idx, _sum, sum_slice); + add_2D_tensor_argument(idx, _output, in_slice); + enqueue(queue, *this, in_slice); + } + while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice)); + } + break; + default: + ARM_COMPUTE_ERROR("Not supported"); } - while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice)); } -- cgit v1.2.1