aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h8
-rw-r--r--src/core/CL/CLKernelLibrary.cpp5
-rw-r--r--src/core/CL/cl_kernels/l2_normalize.cl59
-rw-r--r--src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp45
-rw-r--r--tests/validation/CL/L2NormalizeLayer.cpp4
-rw-r--r--tests/validation/reference/L2NormalizeLayer.cpp40
7 files changed, 134 insertions, 35 deletions
diff --git a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
index ae05bcf879..8dd4609250 100644
--- a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
@@ -50,24 +50,24 @@ public:
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
* @param[in] sum Sum values tensor. Data types supported: same as @p input.
* Sum will have the same number of dimensions as input.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2
* @param[in] epsilon Lower bound value for the normalization.
*/
void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon);
/** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel.
*
- * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] input Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
* @param[in] sum Sum values tensor info. Data types supported: same as @p input.
* Sum will have the same number of dimensions as input.
* @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
* Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2
* @param[in] epsilon Lower bound value for the normalization.
*
* @return a status
diff --git a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
index a7c47a2327..2cabaee5de 100644
--- a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
+++ b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
@@ -53,18 +53,18 @@ public:
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 2
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2
* @param[in] epsilon (Optional) Lower bound value for the normalization.
*/
void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, float epsilon = 1e-12f);
/** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayer.
*
- * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] input Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
* @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 2,
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2
* @param[in] epsilon (Optional) Lower bound value for the normalization.
*
* @return a status
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 847236925a..3c2528f358 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -303,8 +303,9 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "IYUV_to_RGB888_bt709", "color_convert.cl" },
{ "IYUV_to_RGBA8888_bt709", "color_convert.cl" },
{ "IYUV_to_YUV444_bt709", "color_convert.cl" },
- { "l2_normalize_nchw", "l2_normalize.cl" },
- { "l2_normalize_nhwc", "l2_normalize.cl" },
+ { "l2_normalize_x", "l2_normalize.cl" },
+ { "l2_normalize_y", "l2_normalize.cl" },
+ { "l2_normalize_z", "l2_normalize.cl" },
{ "lktracker_stage0", "optical_flow_pyramid_lk.cl" },
{ "lktracker_stage1", "optical_flow_pyramid_lk.cl" },
{ "magnitude_phase", "magnitude_phase.cl" },
diff --git a/src/core/CL/cl_kernels/l2_normalize.cl b/src/core/CL/cl_kernels/l2_normalize.cl
index d230487030..5f66efbcc4 100644
--- a/src/core/CL/cl_kernels/l2_normalize.cl
+++ b/src/core/CL/cl_kernels/l2_normalize.cl
@@ -23,7 +23,7 @@
*/
#include "helpers.h"
-/** This kernel performs l2 normalization. (NCHW)
+/** This kernel performs l2 normalization on x-axis
*
* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
* @note The data size must be passed at compile time using -DDATA_SIZE e.g. -DDATA_SIZE=32
@@ -42,7 +42,7 @@
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
* @param[in] epsilon Epsilon value
*/
-__kernel void l2_normalize_nchw(
+__kernel void l2_normalize_x(
VECTOR_DECLARATION(src),
VECTOR_DECLARATION(sum),
VECTOR_DECLARATION(dst),
@@ -60,7 +60,7 @@ __kernel void l2_normalize_nchw(
vstore16(in * normalize_value, 0, (__global DATA_TYPE *)dst.ptr);
}
-/** This kernel performs l2 normalization. (NHWC)
+/** This kernel performs l2 normalization on y-axis.
*
* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
* @note The data size must be passed at compile time using -DDATA_SIZE e.g. -DDATA_SIZE=32
@@ -85,7 +85,7 @@ __kernel void l2_normalize_nchw(
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
* @param[in] epsilon Epsilon value
*/
-__kernel void l2_normalize_nhwc(
+__kernel void l2_normalize_y(
IMAGE_DECLARATION(src),
IMAGE_DECLARATION(sum),
IMAGE_DECLARATION(dst),
@@ -104,4 +104,55 @@ __kernel void l2_normalize_nhwc(
normalize_value = (VEC_DATA_TYPE(DATA_TYPE, 16))rsqrt(fmax(sums, epsilon));
vstore16(in * normalize_value, 0, (__global DATA_TYPE *)dst.ptr);
+}
+/** This kernel performs l2 normalization on z-axis.
+ *
+ * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
+ * @note The data size must be passed at compile time using -DDATA_SIZE e.g. -DDATA_SIZE=32
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
+ * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along Y processed per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along X processed per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[in] sum_ptr Pointer to the source tensor. Supported data types: F16/F32
+ * @param[in] sum_stride_x Stride of the source tensor in X dimension (in bytes)
+ * @param[in] sum_step_x sum_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] sum_stride_y Stride of the source tensor in Y dimension (in bytes)
+ * @param[in] sum_step_y sum_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] sum_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] sum_step_z sum_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] sum_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in] dst_step_z dst_stride_z * number of elements along Y processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in] epsilon Epsilon value
+ */
+__kernel void l2_normalize_z(
+ TENSOR3D_DECLARATION(src),
+ TENSOR3D_DECLARATION(sum),
+ TENSOR3D_DECLARATION(dst),
+ DATA_TYPE epsilon)
+{
+ Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+ Tensor3D sum = CONVERT_TO_TENSOR3D_STRUCT(sum);
+ Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+ VEC_DATA_TYPE(DATA_TYPE, 16)
+ in = vload16(0, (__global DATA_TYPE *)src.ptr);
+ VEC_DATA_TYPE(DATA_TYPE, 16)
+ sums = vload16(0, (__global DATA_TYPE *)sum.ptr);
+
+ VEC_DATA_TYPE(DATA_TYPE, 16)
+ normalize_value = (VEC_DATA_TYPE(DATA_TYPE, 16))rsqrt(fmax(sums, epsilon));
+
+ vstore16(in * normalize_value, 0, (__global DATA_TYPE *)dst.ptr);
} \ No newline at end of file
diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
index cfd04ef392..97dd919d08 100644
--- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
@@ -109,11 +109,28 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor
build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
// Create kernel
- const DataLayout data_layout = input->info()->data_layout();
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("l2_normalize_" + lower_string(string_from_data_layout(data_layout)), build_opts));
+ std::string kernel_name;
+ unsigned int idx = 0;
+ switch(axis)
+ {
+ case 0:
+ kernel_name = "x";
+ idx = num_arguments_per_1D_tensor() * 3;
+ break;
+ case 1:
+ kernel_name = "y";
+ idx = num_arguments_per_2D_tensor() * 3;
+ break;
+ case 2:
+ kernel_name = "z";
+ idx = num_arguments_per_3D_tensor() * 3;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Not supported");
+ }
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("l2_normalize_" + kernel_name, build_opts));
// Set epsilon argument
- unsigned int idx = data_layout == DataLayout::NCHW ? num_arguments_per_1D_tensor() * 3 : num_arguments_per_2D_tensor() * 3;
if(input->info()->data_type() == DataType::F32)
{
_kernel.setArg<cl_uint>(idx, _epsilon);
@@ -145,9 +162,9 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue
Window window_sum(window);
- switch(_input->info()->data_layout())
+ switch(_axis)
{
- case DataLayout::NCHW:
+ case 0:
{
window_sum.set(Window::DimX, Window::Dimension(0, 0, 0));
Window in_slice = window.first_slice_window_1D();
@@ -163,7 +180,7 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue
while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice));
}
break;
- case DataLayout::NHWC:
+ case 1:
{
window_sum.set(Window::DimY, Window::Dimension(0, 0, 0));
Window in_slice = window.first_slice_window_2D();
@@ -179,6 +196,22 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue
while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice));
}
break;
+ case 2:
+ {
+ window_sum.set(Window::DimZ, Window::Dimension(0, 0, 0));
+ Window in_slice = window.first_slice_window_3D();
+ Window sum_slice = window_sum.first_slice_window_3D();
+ do
+ {
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, _input, in_slice);
+ add_3D_tensor_argument(idx, _sum, sum_slice);
+ add_3D_tensor_argument(idx, _output, in_slice);
+ enqueue(queue, *this, in_slice);
+ }
+ while(window.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(sum_slice));
+ }
+ break;
default:
ARM_COMPUTE_ERROR("Not supported");
}
diff --git a/tests/validation/CL/L2NormalizeLayer.cpp b/tests/validation/CL/L2NormalizeLayer.cpp
index 517ba84069..fae7fd66db 100644
--- a/tests/validation/CL/L2NormalizeLayer.cpp
+++ b/tests/validation/CL/L2NormalizeLayer.cpp
@@ -46,8 +46,8 @@ namespace
constexpr AbsoluteTolerance<float> tolerance_f32(0.00001f);
constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);
-auto data = concat(combine(framework::dataset::make("DataLayout", { DataLayout::NCHW }), framework::dataset::make("Axis", { 0 })), combine(framework::dataset::make("DataLayout", { DataLayout::NHWC }),
- framework::dataset::make("Axis", { 1 })));
+auto data = concat(combine(framework::dataset::make("DataLayout", { DataLayout::NCHW }), framework::dataset::make("Axis", { 0, 1, 2 })), combine(framework::dataset::make("DataLayout", { DataLayout::NHWC }),
+ framework::dataset::make("Axis", { 0, 1 })));
} // namespace
diff --git a/tests/validation/reference/L2NormalizeLayer.cpp b/tests/validation/reference/L2NormalizeLayer.cpp
index 26677511e4..fcd6226f07 100644
--- a/tests/validation/reference/L2NormalizeLayer.cpp
+++ b/tests/validation/reference/L2NormalizeLayer.cpp
@@ -57,24 +57,38 @@ SimpleTensor<T> l2_normalize(const SimpleTensor<T> &src, unsigned int axis, floa
SimpleTensor<T> sum = reduction_operation(src, get_output_shape(src.shape(), axis), axis, ReductionOperation::SUM_SQUARE);
// Compute reference
- const int elems = src.shape()[axis];
- const int upper_dims = src.shape().total_size_upper(axis + 1);
+ const int upper_dims = src.shape().total_size_upper(axis + 1);
+ const int lower_dims = src.shape().total_size_lower(axis + 1);
+ const int lower_dims_sum = sum.shape().total_size_lower(axis + 1);
for(int du = 0; du < upper_dims; ++du)
{
- if(axis == 0)
+ const T *src_row_ptr = src.data() + du * lower_dims;
+ T *dst_row_ptr = dst.data() + du * lower_dims;
+ switch(axis)
{
- const T *src_row_ptr = src.data() + du * elems;
- T *dst_row_ptr = dst.data() + du * elems;
- const T normalization_value = sqrt(std::max(sum[du], static_cast<T>(epsilon)));
- std::transform(src_row_ptr, src_row_ptr + elems, dst_row_ptr, [normalization_value](T val)
+ case 0:
{
- return val / normalization_value;
- });
- }
- else
- {
- ARM_COMPUTE_ERROR("Unsupported normalization axis");
+ const int elems = src.shape()[0];
+ const T normalization_value = sqrt(std::max(sum[du], static_cast<T>(epsilon)));
+ std::transform(src_row_ptr, src_row_ptr + elems, dst_row_ptr, [normalization_value](T val)
+ {
+ return val / normalization_value;
+ });
+ }
+ break;
+ case 1:
+ case 2:
+ {
+ for(int ld = 0; ld < lower_dims; ++ld)
+ {
+ const T normalization_value = sqrt(std::max(sum[ld % lower_dims_sum + du * lower_dims_sum], static_cast<T>(epsilon)));
+ dst_row_ptr[ld] = src_row_ptr[ld] / normalization_value;
+ }
+ }
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Axis not supported");
}
}