From 6c928343b0fa2bf60ffdfe21aea28b598d742ed4 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Thu, 22 Jun 2017 16:55:57 +0100 Subject: COMPMID-413: Add support for QS8 and QS16 CLNormalizationLayer. Change-Id: I1aaa9fb8d05796bbca9cfae584e084646552bb71 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80155 Reviewed-by: Anthony Barbier Tested-by: Kaizen --- .../core/CL/kernels/CLNormalizationLayerKernel.h | 8 +- src/core/CL/cl_kernels/fill_border.cl | 1 + src/core/CL/cl_kernels/fixed_point.h | 1 + src/core/CL/cl_kernels/normalization_layer.cl | 104 ++++++++++----- src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 37 ++++-- .../NEON/kernels/NENormalizationLayerKernel.cpp | 11 +- src/runtime/CL/functions/CLNormalizationLayer.cpp | 3 +- tests/benchmark_new/CL/NormalizationLayer.cpp | 2 +- tests/validation_new/CL/NormalizationLayer.cpp | 145 +++++++++++++++++++++ 9 files changed, 257 insertions(+), 55 deletions(-) create mode 100644 tests/validation_new/CL/NormalizationLayer.cpp diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h index 132fcc4a12..5eedc31486 100644 --- a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h @@ -49,10 +49,10 @@ public: /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: F16, F32. - * @param[in] squared_input Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], - * Data types should match the input type. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types should match the input type. + * and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/F16/F32. + * @param[in] squared_input Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM]. + * Data types supported: same as @p input. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input. * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. */ void configure(const ICLTensor *input, const ICLTensor *squared_input, ICLTensor *output, NormalizationLayerInfo norm_info); diff --git a/src/core/CL/cl_kernels/fill_border.cl b/src/core/CL/cl_kernels/fill_border.cl index 5fbe3ea070..f511613a37 100644 --- a/src/core/CL/cl_kernels/fill_border.cl +++ b/src/core/CL/cl_kernels/fill_border.cl @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "fixed_point.h" #include "helpers.h" /** Fill N pixel of the padding edge of a single channel image by replicating the closest valid pixel. diff --git a/src/core/CL/cl_kernels/fixed_point.h b/src/core/CL/cl_kernels/fixed_point.h index 509e9d01c2..7038d40e16 100644 --- a/src/core/CL/cl_kernels/fixed_point.h +++ b/src/core/CL/cl_kernels/fixed_point.h @@ -378,6 +378,7 @@ EXPQ_IMPL(qs16, qs16x16, 16) LOGQ_IMPL(qs8, qs8x16, 16) LOGQ_IMPL(qs16, qs16x8, 8) +LOGQ_IMPL(qs16, qs16x16, 16) #define LOG_OP_EXPAND_STR(a, type, size, position) log_sat_##type##x##size((a), (position)) #define LOG_OP_EXPAND(a, type, size, position) LOG_OP_EXPAND_STR(a, type, size, position) diff --git a/src/core/CL/cl_kernels/normalization_layer.cl b/src/core/CL/cl_kernels/normalization_layer.cl index 2305ae0d15..598b734c26 100644 --- a/src/core/CL/cl_kernels/normalization_layer.cl +++ b/src/core/CL/cl_kernels/normalization_layer.cl @@ -23,11 +23,41 @@ */ #include "helpers.h" +#if defined(FIXED_POINT_POSITION) + +#include "fixed_point.h" +#define MUL_OP(x, y) MUL_SAT_OP_EXPAND((x), (y), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) +#define ADD_OP(x, y) ADD_SAT_OP_EXPAND((x), (y), DATA_TYPE, VEC_SIZE) +#define DIV_OP(x, y) DIV_SAT_OP_EXPAND((x), (y), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) +#define EXP_OP(x) EXP_OP_EXPAND((x), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) +#define LOG_OP(x) LOG_OP_EXPAND((x), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) +#define POW_OP(x, y) EXP_OP(MUL_OP(LOG_OP((x)), (y))) +#define SQCVT_SAT(a) SQCVT_SAT_OP_EXPAND((a), DATA_TYPE, FIXED_POINT_POSITION) + +#define LOAD_OP(offset, ptr) vload16(offset, ptr) +#define STORE_OP(data, offset, ptr) vstore16(data, offset, ptr) + +#else // FIXED_POINT_POSITION + +#define MUL_OP(x, y) ((x) * (y)) +#define ADD_OP(x, y) ((x) + (y)) +#define DIV_OP(x, y) ((x) / (y)) +#define POW_OP(x, y) pow((x), (y)) +#define SQCVT_SAT(a) (a) + +#define LOAD_OP(offset, ptr) vload4(offset, ptr) +#define STORE_OP(data, offset, ptr) vstore4(data, offset, ptr) + +#endif // FIXED_POINT_POSITION + /** Apply cross map normalization. * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size, e.g. -DVEC_SIZE=16 + * @note In case of fixed-point operation -DFIXED_POINT_POSITION=fixed_point_position must be provided: e.g. -DFIXED_POINT_POSITION=3 + * @note Scaling coefficient (= alpha/norm_size), beta and kappa need to be passed at compile time using -DCOEFF, -DALPHA and -DKAPPA * - * @param[in] input_ptr Pointer to the first source tensor. Supported data types: F16, F32 + * @param[in] input_ptr Pointer to the first source tensor. Supported data types: QS8/QS16/F16/F32 * @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the first source tensor in Y dimension (in bytes) @@ -35,7 +65,7 @@ * @param[in] input_stride_z Stride of the first source tensor in Z dimension (in bytes) * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source tensor - * @param[in] squared_input_ptr Pointer to the second source tensor. Supported data types: F16, F32 + * @param[in] squared_input_ptr Pointer to the second source tensor. Supported data types: same as @p input_ptr * @param[in] squared_input_stride_x Stride of the second source tensor in X dimension (in bytes) * @param[in] squared_input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] squared_input_stride_y Stride of the second source tensor in Y dimension (in bytes) @@ -43,7 +73,7 @@ * @param[in] squared_input_stride_z Stride of the second source tensor in Z dimension (in bytes) * @param[in] squared_input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] squared_input_offset_first_element_in_bytes The offset of the second element in the second source tensor - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: F16, F32 + * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) @@ -51,24 +81,25 @@ * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] coeff Alpha parameter / norm_size - * @param[in] beta Beta parameter in the normalization equation - * @param[in] kappa Kappa parameter in the normalization equation * @param[in] radius Number of elements on the right or left side to normalize across */ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(squared_input), TENSOR3D_DECLARATION(output), - float coeff, - float beta, - float kappa, - uint radius) + uint radius) { Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input); Tensor3D squared_in = CONVERT_TO_TENSOR3D_STRUCT(squared_input); Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output); - DATA_TYPE acc = 0; + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + acc = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))0; + const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + coeff_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(COEFF); + const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + beta_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(BETA); + const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + kappa_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(KAPPA); const int num_of_slices = get_global_size(2); const int current_slice = get_global_id(2); @@ -78,21 +109,26 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input), for(int i = left_slice; i <= right_slice; i++) { - acc += *(__global DATA_TYPE *)tensor3D_offset(&squared_in, 0, 0, i - current_slice); + acc = ADD_OP(acc, LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&squared_in, 0, 0, i - current_slice))); } - const float normalized = pow(kappa + coeff * (float)acc, beta); + acc = ADD_OP(MUL_OP(acc, coeff_v), kappa_v); + const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + normalized = POW_OP(acc, beta_v); + const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + normalized_pixel = DIV_OP(LOAD_OP(0, (__global DATA_TYPE *)in.ptr), normalized); - const float normalized_pixel = (float) * ((__global DATA_TYPE *)in.ptr) / normalized; - - *(__global DATA_TYPE *)out.ptr = CONVERT(normalized_pixel, DATA_TYPE); + STORE_OP(normalized_pixel, 0, (__global DATA_TYPE *)out.ptr); } /** Apply in map normalization. * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size, e.g. -DVEC_SIZE=16 + * @note In case of fixed-point operation -DFIXED_POINT_POSITION=fixed_point_position must be provided: e.g. -DFIXED_POINT_POSITION=3 + * @note Scaling coefficient (= alpha/norm_size), beta and kappa need to be passed at compile time using -DCOEFF, -DALPHA and -DKAPPA * - * @param[in] input_ptr Pointer to the first source tensor. Supported data types: F16, F32 + * @param[in] input_ptr Pointer to the first source tensor. Supported data types: QS8/F16/F32 * @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the first source tensor in Y dimension (in bytes) @@ -100,7 +136,7 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input), * @param[in] input_stride_z Stride of the first source tensor in Z dimension (in bytes) * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source tensor - * @param[in] squared_input_ptr Pointer to the second source tensor. Supported data types: F16, F32 + * @param[in] squared_input_ptr Pointer to the second source tensor. Supported data types: same as @p input_ptr * @param[in] squared_input_stride_x Stride of the second source tensor in X dimension (in bytes) * @param[in] squared_input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] squared_input_stride_y Stride of the second source tensor in Y dimension (in bytes) @@ -108,7 +144,7 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input), * @param[in] squared_input_stride_z Stride of the second source tensor in Z dimension (in bytes) * @param[in] squared_input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] squared_input_offset_first_element_in_bytes The offset of the second element in the second source tensor - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: F16, F32 + * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] output_stride_y Stride of the first destination tensor in Y dimension (in bytes) @@ -116,25 +152,25 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input), * @param[in] output_stride_z Stride of the first source tensor in Z dimension (in bytes) * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] coeff Alpha parameter / norm_size - * @param[in] beta Beta parameter in the normalization equation - * @param[in] kappa Kappa parameter in the normalization equation * @param[in] radius Number of elements on the right or left side to normalize across */ __kernel void normalization_layer_in_map_1D(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(squared_input), TENSOR3D_DECLARATION(output), - float coeff, - float beta, - float kappa, - uint radius) + uint radius) { Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input); Tensor3D squared_in = CONVERT_TO_TENSOR3D_STRUCT(squared_input); Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output); - VEC_DATA_TYPE(DATA_TYPE, 4) - acc_vec = 0; + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + acc = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))0; + const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + coeff_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(COEFF); + const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + beta_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(BETA); + const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + kappa_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(KAPPA); const int current_pos = get_global_id(0) << 2; @@ -143,12 +179,14 @@ __kernel void normalization_layer_in_map_1D(TENSOR3D_DECLARATION(input), for(int i = left_pos; i <= right_pos; i += 1) { - acc_vec += vload4(0, (__global DATA_TYPE *)tensor3D_offset(&squared_in, i - current_pos, 0, 0)); + acc = ADD_OP(acc, LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&squared_in, i - current_pos, 0, 0))); } - const float4 normalized = pow((float4)kappa + coeff * CONVERT(acc_vec, float4), beta); - - const float4 normalized_pixel = CONVERT(vload4(0, (__global DATA_TYPE *)in.ptr), float4) / normalized; + acc = ADD_OP(MUL_OP(acc, coeff_v), kappa_v); + const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + normalized = POW_OP(acc, beta_v); + const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + normalized_pixel = DIV_OP(LOAD_OP(0, (__global DATA_TYPE *)in.ptr), normalized); - vstore4(CONVERT(normalized_pixel, VEC_DATA_TYPE(DATA_TYPE, 4)), 0, (__global DATA_TYPE *)out.ptr); + STORE_OP(normalized_pixel, 0, (__global DATA_TYPE *)out.ptr); } diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp index 1afd76a375..a0607c2ba0 100644 --- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/FixedPoint.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" @@ -46,7 +47,7 @@ BorderSize CLNormalizationLayerKernel::border_size() const void CLNormalizationLayerKernel::configure(const ICLTensor *input, const ICLTensor *squared_input, ICLTensor *output, NormalizationLayerInfo norm_info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(output); // Output tensor auto initialization if not yet initialized @@ -56,34 +57,46 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, const ICLTens ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, squared_input, output); ARM_COMPUTE_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd"); ARM_COMPUTE_ERROR_ON_MSG(norm_info.type() == NormType::IN_MAP_2D, "2D In-Map Normalization not implemented"); - - // Set build options - std::set build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + if(is_data_type_fixed_point(input->info()->data_type())) + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, squared_input, output); + ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.beta(), input); + ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.kappa(), input); + ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.scale_coeff(), input); + } _input = input; _squared_input = squared_input; _output = output; - _is_in_map = (norm_info.type() == NormType::IN_MAP_1D); + _is_in_map = (norm_info.type() != NormType::CROSS_MAP); const unsigned int border_width = _is_in_map ? std::min(norm_info.norm_size() / 2, 3U) : 0; _border_size = BorderSize(0, border_width); + const unsigned int num_elems_processed_per_iteration = (is_data_type_fixed_point(input->info()->data_type())) ? 16 : 4; + const unsigned int num_elems_read_per_iteration = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2); + + // Set build options + std::set build_opts; + build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + if(is_data_type_fixed_point(input->info()->data_type())) + { + build_opts.emplace(("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()))); + } + build_opts.emplace(("-DCOEFF=" + support::cpp11::to_string(norm_info.scale_coeff()))); + build_opts.emplace(("-DBETA=" + support::cpp11::to_string(norm_info.beta()))); + build_opts.emplace(("-DKAPPA=" + support::cpp11::to_string(norm_info.kappa()))); + build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); + // Create kernel std::string kernel_name = (norm_info.type() == NormType::IN_MAP_1D) ? "normalization_layer_in_map_1D" : "normalization_layer_cross_map"; _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); // Set kernel static arguments unsigned int idx = 3 * num_arguments_per_3D_tensor(); // Skip the input and output parameters - _kernel.setArg(idx++, norm_info.scale_coeff()); - _kernel.setArg(idx++, norm_info.beta()); - _kernel.setArg(idx++, norm_info.kappa()); _kernel.setArg(idx++, norm_info.norm_size() / 2); // Configure kernel window - const unsigned int num_elems_processed_per_iteration = (_is_in_map) ? 4 : 1; - const unsigned int num_elems_read_per_iteration = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2); - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); AccessWindowHorizontal input_access(input->info(), -_border_size.left, num_elems_read_per_iteration); diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp index 085d412558..0e15244f0e 100644 --- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp @@ -51,12 +51,15 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor * // Output tensor auto initialization if not yet initialized auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, input_squared, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, input_squared, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, input_squared, output); ARM_COMPUTE_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd"); - ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.beta(), input); - ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.kappa(), input); - ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.scale_coeff(), input); + if(is_data_type_fixed_point(input->info()->data_type())) + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, input_squared, output); + ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.beta(), input); + ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.kappa(), input); + ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(norm_info.scale_coeff(), input); + } const unsigned int border_width = (norm_info.type() == NormType::CROSS_MAP) ? 0 : std::min(norm_info.norm_size() / 2, 3U); diff --git a/src/runtime/CL/functions/CLNormalizationLayer.cpp b/src/runtime/CL/functions/CLNormalizationLayer.cpp index 2d89ebd676..69cef334e8 100644 --- a/src/runtime/CL/functions/CLNormalizationLayer.cpp +++ b/src/runtime/CL/functions/CLNormalizationLayer.cpp @@ -41,7 +41,8 @@ void CLNormalizationLayer::configure(const ICLTensor *input, ICLTensor *output, { ARM_COMPUTE_ERROR_ON(input == nullptr); - _squared_input.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, input->info()->data_type())); + TensorInfo tensor_info(input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); + _squared_input.allocator()->init(tensor_info); _norm_kernel.configure(input, &_squared_input, output, norm_info); _multiply_kernel.configure(input, input, &_squared_input, 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN); diff --git a/tests/benchmark_new/CL/NormalizationLayer.cpp b/tests/benchmark_new/CL/NormalizationLayer.cpp index 7ba78a5e99..7e2380ccc8 100644 --- a/tests/benchmark_new/CL/NormalizationLayer.cpp +++ b/tests/benchmark_new/CL/NormalizationLayer.cpp @@ -40,7 +40,7 @@ namespace test { namespace { -const auto normalization_layer_data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 }); +const auto normalization_layer_data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::QS16, DataType::F16, DataType::F32 }); } // namespace using CLNormalizationLayerFixture = NormalizationLayerFixture; diff --git a/tests/validation_new/CL/NormalizationLayer.cpp b/tests/validation_new/CL/NormalizationLayer.cpp new file mode 100644 index 0000000000..22ca96423a --- /dev/null +++ b/tests/validation_new/CL/NormalizationLayer.cpp @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h" +#include "framework/Asserts.h" +#include "framework/Macros.h" +#include "framework/datasets/Datasets.h" +#include "tests/CL/CLAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets_new/NormalizationTypesDataset.h" +#include "tests/datasets_new/ShapeDatasets.h" +#include "tests/validation_new/Validation.h" +#include "tests/validation_new/fixtures/NormalizationLayerFixture.h" +#include "tests/validation_new/half.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Tolerance for float operations */ +#ifdef ARM_COMPUTE_ENABLE_FP16 +constexpr float tolerance_f16 = 0.001f; +#endif /* ARM_COMPUTE_ENABLE_FP16 */ +constexpr float tolerance_f32 = 0.00001f; +/** Tolerance for fixed point operations */ +constexpr int8_t tolerance_qs8 = 2; +constexpr int16_t tolerance_qs16 = 2; + +/** Input data set. */ +const auto NormalizationDataset = combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("NormType", { NormType::IN_MAP_1D, NormType::CROSS_MAP })), + framework::dataset::make("NormalizationSize", 3, 9, 2)), + framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })); +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(NormalizationLayer) + +//TODO(COMPMID-415): Missing configuration? + +template +using CLNormalizationLayerFixture = NormalizationValidationFixture; + +TEST_SUITE(Float) +#ifdef ARM_COMPUTE_ENABLE_FP16 +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLNormalizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLNormalizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() +TEST_SUITE_END() + +template +using CLNormalizationLayerFixedPointFixture = NormalizationValidationFixedPointFixture; + +TEST_SUITE(Quantized) +TEST_SUITE(QS8) +// Testing for fixed point position [1,6) as reciprocal limits the maximum fixed point position to 5 +FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(NormalizationDataset, framework::dataset::make("DataType", + DataType::QS8)), + framework::dataset::make("FractionalBits", 1, 6))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qs8); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLNormalizationLayerFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(NormalizationDataset, framework::dataset::make("DataType", + DataType::QS8)), + framework::dataset::make("FractionalBits", 1, 6))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qs8); +} +TEST_SUITE_END() + +TEST_SUITE(QS16) +// Testing for fixed point position [1,14) as reciprocal limits the maximum fixed point position to 5 +FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(NormalizationDataset, framework::dataset::make("DataType", + DataType::QS16)), + framework::dataset::make("FractionalBits", 1, 14))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qs16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLNormalizationLayerFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(NormalizationDataset, framework::dataset::make("DataType", + DataType::QS16)), + framework::dataset::make("FractionalBits", 1, 14))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qs16); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute -- cgit v1.2.1