/* * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "tests/validation/Helpers.h" #include "tests/framework/Asserts.h" #include #include namespace arm_compute { namespace test { namespace validation { template <> SimpleTensor convert_from_asymmetric(const SimpleTensor &src) { const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform(); SimpleTensor dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() }; #if defined(_OPENMP) #pragma omp parallel for #endif /* _OPENMP */ for(int i = 0; i < src.num_elements(); ++i) { dst[i] = dequantize_qasymm8(src[i], quantization_info); } return dst; } template <> SimpleTensor convert_from_asymmetric(const SimpleTensor &src) { const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform(); SimpleTensor dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() }; #if defined(_OPENMP) #pragma omp parallel for #endif /* _OPENMP */ for(int i = 0; i < src.num_elements(); ++i) { dst[i] = dequantize_qasymm8_signed(src[i], quantization_info); } return dst; } template <> SimpleTensor convert_from_asymmetric(const SimpleTensor &src) { const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform(); SimpleTensor dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() }; #if defined(_OPENMP) #pragma omp parallel for #endif /* _OPENMP */ for(int i = 0; i < src.num_elements(); ++i) { dst[i] = dequantize_qasymm16(src[i], quantization_info); } return dst; } template <> SimpleTensor convert_to_asymmetric(const SimpleTensor &src, const QuantizationInfo &quantization_info) { SimpleTensor dst{ src.shape(), DataType::QASYMM8, 1, quantization_info }; const UniformQuantizationInfo &qinfo = quantization_info.uniform(); #if defined(_OPENMP) #pragma omp parallel for #endif /* _OPENMP */ for(int i = 0; i < src.num_elements(); ++i) { dst[i] = quantize_qasymm8(src[i], qinfo); } return dst; } template <> SimpleTensor convert_to_asymmetric(const SimpleTensor &src, const QuantizationInfo &quantization_info) { SimpleTensor dst{ src.shape(), DataType::QASYMM8_SIGNED, 1, quantization_info }; const UniformQuantizationInfo &qinfo = quantization_info.uniform(); #if defined(_OPENMP) #pragma omp parallel for #endif /* _OPENMP */ for(int i = 0; i < src.num_elements(); ++i) { dst[i] = quantize_qasymm8_signed(src[i], qinfo); } return dst; } template <> SimpleTensor convert_to_asymmetric(const SimpleTensor &src, const QuantizationInfo &quantization_info) { SimpleTensor dst{ src.shape(), DataType::QASYMM16, 1, quantization_info }; const UniformQuantizationInfo &qinfo = quantization_info.uniform(); #if defined(_OPENMP) #pragma omp parallel for #endif /* _OPENMP */ for(int i = 0; i < src.num_elements(); ++i) { dst[i] = quantize_qasymm16(src[i], qinfo); } return dst; } template <> SimpleTensor convert_to_symmetric(const SimpleTensor &src, const QuantizationInfo &quantization_info) { SimpleTensor dst{ src.shape(), DataType::QSYMM16, 1, quantization_info }; const UniformQuantizationInfo &qinfo = quantization_info.uniform(); #if defined(_OPENMP) #pragma omp parallel for #endif /* _OPENMP */ for(int i = 0; i < src.num_elements(); ++i) { dst[i] = quantize_qsymm16(src[i], qinfo); } return dst; } template <> SimpleTensor convert_from_symmetric(const SimpleTensor &src) { const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform(); SimpleTensor dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() }; #if defined(_OPENMP) #pragma omp parallel for #endif /* _OPENMP */ for(int i = 0; i < src.num_elements(); ++i) { dst[i] = dequantize_qsymm16(src[i], quantization_info); } return dst; } template void matrix_multiply(const SimpleTensor &a, const SimpleTensor &b, SimpleTensor &out) { ARM_COMPUTE_ERROR_ON(a.shape()[0] != b.shape()[1]); ARM_COMPUTE_ERROR_ON(a.shape()[1] != out.shape()[1]); ARM_COMPUTE_ERROR_ON(b.shape()[0] != out.shape()[0]); const int M = a.shape()[1]; // Rows const int N = b.shape()[0]; // Cols const int K = b.shape()[1]; #if defined(_OPENMP) #pragma omp parallel for collapse(2) #endif /* _OPENMP */ for(int y = 0; y < M; ++y) { for(int x = 0; x < N; ++x) { float acc = 0.0f; for(int k = 0; k < K; ++k) { acc += a[y * K + k] * b[x + k * N]; } out[x + y * N] = acc; } } } template void transpose_matrix(const SimpleTensor &in, SimpleTensor &out) { ARM_COMPUTE_ERROR_ON((in.shape()[0] != out.shape()[1]) || (in.shape()[1] != out.shape()[0])); const int width = in.shape()[0]; const int height = in.shape()[1]; #if defined(_OPENMP) #pragma omp parallel for collapse(2) #endif /* _OPENMP */ for(int y = 0; y < height; ++y) { for(int x = 0; x < width; ++x) { const T val = in[x + y * width]; out[x * height + y] = val; } } } template void get_tile(const SimpleTensor &in, SimpleTensor &tile, const Coordinates &coord) { ARM_COMPUTE_ERROR_ON(tile.shape().num_dimensions() > 2); const int w_tile = tile.shape()[0]; const int h_tile = tile.shape()[1]; // Fill the tile with zeros std::fill(tile.data() + 0, (tile.data() + (w_tile * h_tile)), static_cast(0)); // Check if with the dimensions greater than 2 we could have out-of-bound reads for(size_t d = 2; d < Coordinates::num_max_dimensions; ++d) { if(coord[d] < 0 || coord[d] >= static_cast(in.shape()[d])) { ARM_COMPUTE_ERROR("coord[d] < 0 || coord[d] >= in.shape()[d] with d >= 2"); } } // Since we could have out-of-bound reads along the X and Y dimensions, // we start calculating the input address with x = 0 and y = 0 Coordinates start_coord = coord; start_coord[0] = 0; start_coord[1] = 0; // Get input and roi pointers auto in_ptr = static_cast(in(start_coord)); auto roi_ptr = static_cast(tile.data()); const int x_in_start = std::max(0, coord[0]); const int y_in_start = std::max(0, coord[1]); const int x_in_end = std::min(static_cast(in.shape()[0]), coord[0] + w_tile); const int y_in_end = std::min(static_cast(in.shape()[1]), coord[1] + h_tile); // Number of elements to copy per row const int n = x_in_end - x_in_start; // Starting coordinates for the ROI const int x_tile_start = coord[0] > 0 ? 0 : std::abs(coord[0]); const int y_tile_start = coord[1] > 0 ? 0 : std::abs(coord[1]); // Update input pointer in_ptr += x_in_start; in_ptr += (y_in_start * in.shape()[0]); // Update ROI pointer roi_ptr += x_tile_start; roi_ptr += (y_tile_start * tile.shape()[0]); for(int y = y_in_start; y < y_in_end; ++y) { // Copy per row std::copy(in_ptr, in_ptr + n, roi_ptr); in_ptr += in.shape()[0]; roi_ptr += tile.shape()[0]; } } template void zeros(SimpleTensor &in, const Coordinates &anchor, const TensorShape &shape) { ARM_COMPUTE_ERROR_ON(anchor.num_dimensions() != shape.num_dimensions()); ARM_COMPUTE_ERROR_ON(in.shape().num_dimensions() > 2); ARM_COMPUTE_ERROR_ON(shape.num_dimensions() > 2); // Check if with the dimensions greater than 2 we could have out-of-bound reads for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d) { if(anchor[d] < 0 || ((anchor[d] + shape[d]) > in.shape()[d])) { ARM_COMPUTE_ERROR("anchor[d] < 0 || (anchor[d] + shape[d]) > in.shape()[d]"); } } // Get input pointer auto in_ptr = static_cast(in(anchor[0] + anchor[1] * in.shape()[0])); const unsigned int n = in.shape()[0]; for(unsigned int y = 0; y < shape[1]; ++y) { std::fill(in_ptr, in_ptr + shape[0], 0); in_ptr += n; } } std::pair get_quantized_bounds(const QuantizationInfo &quant_info, float min, float max) { ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max"); const int min_bound = quantize_qasymm8(min, quant_info.uniform()); const int max_bound = quantize_qasymm8(max, quant_info.uniform()); return std::pair { min_bound, max_bound }; } std::pair get_quantized_qasymm8_signed_bounds(const QuantizationInfo &quant_info, float min, float max) { ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max"); const int min_bound = quantize_qasymm8_signed(min, quant_info.uniform()); const int max_bound = quantize_qasymm8_signed(max, quant_info.uniform()); return std::pair { min_bound, max_bound }; } std::pair get_symm_quantized_per_channel_bounds(const QuantizationInfo &quant_info, float min, float max, size_t channel_id) { ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max"); const int min_bound = quantize_qsymm8_per_channel(min, quant_info, channel_id); const int max_bound = quantize_qsymm8_per_channel(max, quant_info, channel_id); return std::pair { min_bound, max_bound }; } void add_padding_x(std::initializer_list tensors, const DataLayout &data_layout, bool only_right_pad) { if(data_layout == DataLayout::NHWC) { constexpr unsigned int lower = 1U; constexpr unsigned int upper = 16U; std::uniform_int_distribution distribution(lower, upper); size_t seed_offset = 0; for(ITensor *tensor : tensors) { ARM_COMPUTE_ERROR_ON(!tensor->info()->is_resizable()); std::mt19937 gen(library->seed() + seed_offset++); const unsigned int right = distribution(gen); const unsigned int left = only_right_pad ? 0 : distribution(gen); tensor->info()->extend_padding(PaddingSize(0U, right, 0U, left)); } } } void add_padding_y(std::initializer_list tensors, const DataLayout &data_layout) { if(data_layout == DataLayout::NHWC) { constexpr unsigned int lower = 1U; constexpr unsigned int upper = 4U; std::uniform_int_distribution distribution(lower, upper); size_t seed_offset = 0; for(ITensor *tensor : tensors) { ARM_COMPUTE_ERROR_ON(!tensor->info()->is_resizable()); std::mt19937 gen(library->seed() + seed_offset++); const unsigned int top = distribution(gen); const unsigned int bottom = distribution(gen); tensor->info()->extend_padding(PaddingSize(top, 0U, bottom, 0U)); } } } QuantizationInfo calculate_mat_mul_dst_q_info(const QuantizationInfo &a_q_info, const QuantizationInfo &b_q_info, int m, int n, int k, DataType data_type) { ARM_COMPUTE_UNUSED(m, n); QuantizationInfo c_q_info; ARM_COMPUTE_ASSERT(data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED); const int32_t t_max = static_cast(data_type == DataType::QASYMM8 ? std::numeric_limits::max() : std::numeric_limits::max()); const int32_t t_min = static_cast(data_type == DataType::QASYMM8 ? std::numeric_limits::min() : std::numeric_limits::min()); /** Quantization Setup of matrix multiplication * * We have a matrix multiplication of the form C = A * B * where A is (M X K), B is (K x N) and C is therefore (M x N). * * If we have some distributions statistics of A and B, i.e. mean and variance, * we can estimate the mean and variance of a single value in C matrix and * pick good scale and offset values for the output and have non-saturated tests. * * Each element in the output matrix can be calculated as follows: * C_ij = sum_k(A_ik * B_kj) * * All values are float above. * * Note: All possible A_ik, B_kj random variables are assumed mutually independent. * * Terminology: * E[X]: Mean of the random variable X (sometimes referred as mu_x) * var(X): Variance of the random variable X (someimes referred as sigma^2_x) * std(X): sqrt(var(X)), standard deviation of X * * 1) Calculate the mean: * E[C_ij] = sum_k( E[A_ik] * E[B_kj] ) = K * mean_a * mean_b * * Since elements of A and B are uniformly distributed random variables, we have * mean_a = (max_a + min_a) / 2, mean_b = (max_b + min_b ) / 2 * max_a and min_a can be calculated with the scale_a/b and offset_a/b * by replacing data type minimum and maximums in the equations * * 2) Calculate the variance: * var(C_ij) = sum_k( var(A_ik * B_kj) ) * = sum_k ( E[A_ik^2 * B_kj^2] - E[A_ik]^2E[B_kj^2] ) * = ... * = K * (var_a * var_b + var_a * mean^2_b + var_b * mean^2_a) * * Similarly, due to uniform random variable properties, we have * var_a = (max_a - min_a)^2 / 12 * var_b = (max_b - min_b)^2 / 12 * * * 3) Now, we have an idea of what would an average C_ij will like and how much deviation * is present around it. The exact distribution of C is not easy to come up with dependent on K. * But, as K increases, due to Central Limit Theorem, it'll look more like a bell shaped figure, * approaching normal distribution. * * This is useful because, in normal distribution, we know that values +- 2 std_deviation around * the mean constitute 95% of the values. Therefore, setting a plausible range for us: * C_range = [C_min, C_max] = [mean_c - 2 * std_c, mean_c + 2 * std_c] * * 4) * If we map this [C_min, C_max] to [0, 255] or [-128, 127] depending on the signedness of the * data type, we can find a suitable scale and offset for the output. On average, it's expected * that 5% of the output values will saturate and 95% will remain in the range. * * The equations to be solved for offset_c and scale_c are: * C_min = scale_c * (type_min - offset_c) * C_max = scale_c * (type_max - offset_c) */ const int32_t a_offset = a_q_info.uniform().offset; const float a_scale = a_q_info.uniform().scale; const int32_t b_offset = b_q_info.uniform().offset; const float b_scale = b_q_info.uniform().scale; // Lhs/A stats const float max_a = (t_max - a_offset) * a_scale; const float min_a = (t_min - a_offset) * a_scale; const float mean_a = (max_a + min_a) / 2; const float var_a = (max_a - min_a) * (max_a - min_a) / 12; // Rhs/B stats const float max_b = (t_max - b_offset) * b_scale; const float min_b = (t_min - b_offset) * b_scale; const float mean_b = (max_b + min_b) / 2; const float var_b = (max_b - min_b) * (max_b - min_b) / 12; // Output stats const float mean_out = k * mean_a * mean_b; const float var_out = k * (var_a * var_b + var_a * mean_b * mean_b + var_b * mean_a * mean_a); const float std_out = sqrt(var_out); // Output quantization setup const float scale_out = 4 * std_out / 255; const int32_t offset_out = static_cast(t_min - (mean_out - 2.f * std_out) / scale_out); c_q_info = QuantizationInfo(scale_out, offset_out); return c_q_info; } template void get_tile(const SimpleTensor &in, SimpleTensor &roi, const Coordinates &coord); template void get_tile(const SimpleTensor &in, SimpleTensor &roi, const Coordinates &coord); template void get_tile(const SimpleTensor &in, SimpleTensor &roi, const Coordinates &coord); template void get_tile(const SimpleTensor &in, SimpleTensor &roi, const Coordinates &coord); template void get_tile(const SimpleTensor &in, SimpleTensor &roi, const Coordinates &coord); template void zeros(SimpleTensor &in, const Coordinates &anchor, const TensorShape &shape); template void zeros(SimpleTensor &in, const Coordinates &anchor, const TensorShape &shape); template void transpose_matrix(const SimpleTensor &in, SimpleTensor &out); template void transpose_matrix(const SimpleTensor &in, SimpleTensor &out); template void transpose_matrix(const SimpleTensor &in, SimpleTensor &out); template void transpose_matrix(const SimpleTensor &in, SimpleTensor &out); template void transpose_matrix(const SimpleTensor &in, SimpleTensor &out); template void transpose_matrix(const SimpleTensor &in, SimpleTensor &out); template void transpose_matrix(const SimpleTensor &in, SimpleTensor &out); template void matrix_multiply(const SimpleTensor &a, const SimpleTensor &b, SimpleTensor &out); template void matrix_multiply(const SimpleTensor &a, const SimpleTensor &b, SimpleTensor &out); } // namespace validation } // namespace test } // namespace arm_compute