COMPMID-1632 Add CLL2NormalizationLayer for NHWC and FP32

Change-Id: Iae22554d5fe893fd22a000eab5bfd8275ea06eb3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/154102 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: bsgcomp <bsgcomp@arm.com>
author: Michalis Spyrou <michalis.spyrou@arm.com> 2018-10-11 17:33:32 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:55:45 +0000
commit: 8aaf93e8c12ce93d3d0082d4f4b70376f15536da (patch)
tree: 0922f3dde6fafae181e101df315ef36007801850 /src/core/CL/kernels
parent: c93691717a6e7ca67e32b4dedd233b8c63b6daf2 (diff)
download: ComputeLibrary-8aaf93e8c12ce93d3d0082d4f4b70376f15536da.tar.gz
2 files changed, 51 insertions, 19 deletions
diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
index 54ed51eda2..cfd04ef392 100644
--- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
@@ -49,9 +49,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, cons
 
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, sum, output);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW);
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0");
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
 
     // Reduce shape on axis
@@ -62,9 +61,9 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, cons
     if(output->total_size() != 0)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(input->tensor_shape(), output->tensor_shape());
-        ARM_COMPUTE_RETURN_ERROR_ON(output->data_layout() != DataLayout::NCHW);
     }
 
     return Status{};
@@ -110,11 +109,19 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor
     build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
 
     // Create kernel
-    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("l2_normalize", build_opts));
+    const DataLayout data_layout = input->info()->data_layout();
+    _kernel                      = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("l2_normalize_" + lower_string(string_from_data_layout(data_layout)), build_opts));
 
     // Set epsilon argument
-    unsigned int idx = num_arguments_per_1D_tensor() * 3;
-    _kernel.setArg<cl_uint>(idx, _epsilon);
+    unsigned int idx = data_layout == DataLayout::NCHW ? num_arguments_per_1D_tensor() * 3 : num_arguments_per_2D_tensor() * 3;
+    if(input->info()->data_type() == DataType::F32)
+    {
+        _kernel.setArg<cl_uint>(idx, _epsilon);
+    }
+    else
+    {
+        _kernel.setArg<cl_ushort>(idx, _epsilon);
+    }
 
     // Configure kernel window
     auto win_config = validate_and_configure_window(_input->info(), _output->info());
@@ -137,18 +144,42 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
 
     Window window_sum(window);
-    window_sum.set(Window::DimX, Window::Dimension(0, 0, 0));
-
-    Window in_slice  = window.first_slice_window_1D();
-    Window sum_slice = window_sum.first_slice_window_1D();
 
-    do
+    switch(_input->info()->data_layout())
     {
-        unsigned int idx = 0;
-        add_1D_tensor_argument(idx, _input, in_slice);
-        add_1D_tensor_argument(idx, _sum, sum_slice);
-        add_1D_tensor_argument(idx, _output, in_slice);
-        enqueue(queue, *this, in_slice);
+        case DataLayout::NCHW:
+        {
+            window_sum.set(Window::DimX, Window::Dimension(0, 0, 0));
+            Window in_slice  = window.first_slice_window_1D();
+            Window sum_slice = window_sum.first_slice_window_1D();
+            do
+            {
+                unsigned int idx = 0;
+                add_1D_tensor_argument(idx, _input, in_slice);
+                add_1D_tensor_argument(idx, _sum, sum_slice);
+                add_1D_tensor_argument(idx, _output, in_slice);
+                enqueue(queue, *this, in_slice);
+            }
+            while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice));
+        }
+        break;
+        case DataLayout::NHWC:
+        {
+            window_sum.set(Window::DimY, Window::Dimension(0, 0, 0));
+            Window in_slice  = window.first_slice_window_2D();
+            Window sum_slice = window_sum.first_slice_window_2D();
+            do
+            {
+                unsigned int idx = 0;
+                add_2D_tensor_argument(idx, _input, in_slice);
+                add_2D_tensor_argument(idx, _sum, sum_slice);
+                add_2D_tensor_argument(idx, _output, in_slice);
+                enqueue(queue, *this, in_slice);
+            }
+            while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice));
+        }
+        break;
+        default:
+            ARM_COMPUTE_ERROR("Not supported");
     }
-    while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice));
 }
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index d4165ccd4e..ef46325e4d 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -46,7 +46,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(op == ReductionOperation::SUM_SQUARE && axis != 0, "Not supported reduction operation for this axis");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(op == ReductionOperation::SUM_SQUARE && input->data_type() == DataType::QASYMM8, "Not supported reduction operation for QASYMM8");
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
     ARM_COMPUTE_RETURN_ERROR_ON(op == ReductionOperation::MEAN_SUM && axis == 0 && width == 0 && input->data_type() != DataType::QASYMM8);
@@ -142,6 +142,7 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou
     }
     build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
     build_opts.add_option("-DDATA_TYPE_PROMOTED=" + data_type_promoted);
+    build_opts.add_option_if(op == ReductionOperation::SUM_SQUARE, "-DSUM_SQUARE=");
     build_opts.add_option_if(op == ReductionOperation::MEAN_SUM, "-DMEAN");
 
     switch(op)
author	Michalis Spyrou <michalis.spyrou@arm.com>	2018-10-11 17:33:32 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:55:45 +0000
commit	8aaf93e8c12ce93d3d0082d4f4b70376f15536da (patch)
tree	0922f3dde6fafae181e101df315ef36007801850 /src/core/CL/kernels
parent	c93691717a6e7ca67e32b4dedd233b8c63b6daf2 (diff)
download	ComputeLibrary-8aaf93e8c12ce93d3d0082d4f4b70376f15536da.tar.gz