From 68dd25fbe6e4d3c3513fa5993863419769aa08fc Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Mon, 19 Oct 2020 16:00:11 +0100 Subject: COMPMID-3637: Move utility headers from arm_compute to src Signed-off-by: Georgios Pinitas Change-Id: If9d6fa8c900b68c4b6fd373f2fc1f9abb83ea917 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4145 Tested-by: Arm Jenkins Reviewed-by: Sang-Hoon Park Comments-Addressed: Arm Jenkins --- .../NEON/kernels/convolution/common/activation.hpp | 37 - .../core/NEON/kernels/convolution/common/alloc.hpp | 31 - .../core/NEON/kernels/convolution/common/arm.hpp | 39 -- .../kernels/convolution/common/convolution.hpp | 29 - .../NEON/kernels/convolution/common/padding.hpp | 91 --- .../core/NEON/kernels/convolution/common/perf.h | 32 - .../NEON/kernels/convolution/common/qasymm8.hpp | 54 -- .../NEON/kernels/convolution/common/qsymm8.hpp | 76 --- .../core/NEON/kernels/convolution/common/shims.hpp | 749 --------------------- .../NEON/kernels/convolution/common/tensor.hpp | 178 ----- .../kernels/convolution/common/tensor_utils.hpp | 46 -- .../core/NEON/kernels/convolution/common/utils.hpp | 60 -- 12 files changed, 1422 deletions(-) delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/activation.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/alloc.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/arm.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/convolution.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/padding.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/perf.h delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/shims.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/tensor.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/utils.hpp (limited to 'arm_compute/core/NEON/kernels/convolution/common') diff --git a/arm_compute/core/NEON/kernels/convolution/common/activation.hpp b/arm_compute/core/NEON/kernels/convolution/common/activation.hpp deleted file mode 100644 index 0c9b7c1368..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/activation.hpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -namespace neon_convolution_kernels -{ - -enum class ActivationFunction -{ - None, - ReLU, - ReLU6, -}; - -} diff --git a/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp b/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp deleted file mode 100644 index 7be3cdaaf5..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#ifdef ALLOC_ALIGN -#define ALLOCATE(x) aligned_alloc(ALLOC_ALIGN, x) -#else -#define ALLOCATE(x) malloc(x) -#endif diff --git a/arm_compute/core/NEON/kernels/convolution/common/arm.hpp b/arm_compute/core/NEON/kernels/convolution/common/arm.hpp deleted file mode 100644 index b19bf98252..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/arm.hpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** Sets the macro __arm_any__ if compiling for Aarch32 or Aarch64. - * Includes `arm_neon.h` if compiling for either architecture. - */ - -#ifdef __arm__ -#define __arm_any__ -#endif // __arm__ - -#ifdef __aarch64__ -#define __arm_any__ -#endif // __aarch64__ - -#ifdef __arm_any__ -#include -#endif // __arm_any__ diff --git a/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp b/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp deleted file mode 100644 index b1413527c3..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -enum PaddingType { - PADDING_SAME, PADDING_VALID -}; diff --git a/arm_compute/core/NEON/kernels/convolution/common/padding.hpp b/arm_compute/core/NEON/kernels/convolution/common/padding.hpp deleted file mode 100644 index b6f95872c0..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/padding.hpp +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include - -// Utilities for copying tensor tiles and adding/removing padding. -namespace padding -{ - -/* Copy a tile and apply padding to the output copy. - */ -template -void copy_and_pad_tile( - unsigned int tile_rows, - unsigned int tile_cols, - unsigned int n_channels, - const T *inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - T* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride, - unsigned int pad_top, - unsigned int pad_left, - unsigned int pad_bottom, - unsigned int pad_right, - T pad_value=static_cast(0) -); - -/** Copy a tile and remove padding elements in the output. - */ -template -class CopyCropped -{ - public: - static void execute( - size_t size, // Amount of data to copy - const void *inptr, - size_t in_row_stride, - size_t in_col_stride, - void *outptr, - size_t out_row_stride, - size_t out_col_stride, - unsigned int pad_top, - unsigned int pad_left, - unsigned int pad_bottom, - unsigned int pad_right - ); -}; - -template -void crop_and_copy_tile( - unsigned int tile_rows, - unsigned int tile_cols, - unsigned int n_channels, - const T *inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - T *outptr, - unsigned int out_row_stride, - unsigned int out_col_stride, - unsigned int crop_top, - unsigned int crop_left, - unsigned int crop_bottom, - unsigned int crop_right -); - -} diff --git a/arm_compute/core/NEON/kernels/convolution/common/perf.h b/arm_compute/core/NEON/kernels/convolution/common/perf.h deleted file mode 100644 index fbae4dcdfa..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/perf.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2018 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#pragma once - -/* Prototypes from perf.c */ - -void start_counter(int fd); -long long get_counter(int fd); -long long stop_counter(int fd); -int open_instruction_counter(void); -int open_cycle_counter(void); diff --git a/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp b/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp deleted file mode 100644 index 88ef7327c0..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include - -namespace qasymm8 -{ - -struct QAsymm8Params -{ - uint8_t quantize(float value) const; - float dequantize(uint8_t value) const; - - uint8_t offset; - float scale; -}; - -struct QAsymm8RescaleParams -{ - static QAsymm8RescaleParams make_rescale_params( - const QAsymm8Params& weight_quant, - const QAsymm8Params& input_quant, - const QAsymm8Params& output_quant - ); - - QAsymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale); - - const int32_t shift, multiplier; - const float rescale; -}; - -} diff --git a/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp b/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp deleted file mode 100644 index 726a02ccfd..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include -#include -#include "qasymm8.hpp" - - -namespace qsymm8 { - -struct QSymm8Params { - int8_t quantize(float value) const; - float dequantize(int8_t value) const; - - float scale; -}; - -struct QSymm8RescaleParams { - static QSymm8RescaleParams - make_rescale_params(const QSymm8Params &weight_quant, - const QSymm8Params &input_quant, - const QSymm8Params &output_quant); - - QSymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale); - - const int32_t shift, multiplier; - const float rescale; -}; - -struct QSymm8PerChannelParams { - int8_t quantize(float value, float scale) const; - float dequantize(int8_t value, float scale) const; - - std::vector scales; -}; - -struct QSymm8PerChannelRescaleParams { - static QSymm8PerChannelRescaleParams - make_rescale_params(const QSymm8PerChannelParams &weight_quant, - const QSymm8PerChannelParams &input_quant, - const QSymm8PerChannelParams &output_quant); - - static QSymm8PerChannelRescaleParams - make_rescale_params(const QSymm8PerChannelParams &weight_quant, - const qasymm8::QAsymm8Params &input_quant, - const qasymm8::QAsymm8Params &output_quant); - - QSymm8PerChannelRescaleParams(std::vector& shift, std::vector& multiplier, std::vector& rescale); - - std::vector shifts, multipliers; - std::vector rescales; -}; - -} // namespace qsymm8 diff --git a/arm_compute/core/NEON/kernels/convolution/common/shims.hpp b/arm_compute/core/NEON/kernels/convolution/common/shims.hpp deleted file mode 100644 index 310bd47b82..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/shims.hpp +++ /dev/null @@ -1,749 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#ifndef DOXYGEN_SKIP_THIS -#include -#endif /* DOXYGEN_SKIP_THIS */ -#include "arm.hpp" - -namespace reorder { -/** Re-order a tensor from NCHW format to NHWC. - * - * @note The stride parameters are optional and are provided to allow padding in either input or output tensors. - * - * @param[in] in Input tensor in NCHW format. - * @param[out] out Output tensor, to be written in NHWC format. - * @param n_batches Number of batches in the tensors. - * @param n_channels Number of channels in the tensors - * @param n_rows Height of the tensor - * @param n_cols Width of the tensor - * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_channels * in_channel_stride`. - * @param in_channel_stride Stride over channels in the input tensor. If `0` defaults to `n_rows * in_row_stride`. - * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols`. - * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_rows * out_row_stride`. - * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols * out_col_stride`. - * @param out_col_stride Stride over columns in the output tensor. If `0` defaults to `n_channels`. - */ -template -inline void nchw_to_nhwc( - const T* const in, - T* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride=0, - int in_channel_stride=0, - int in_row_stride=0, - int out_batch_stride=0, - int out_row_stride=0, - int out_col_stride=0 -); - -/** Re-order a tensor from NHWC format to NCHW. - * - * @note The stride parameters are optional and are provided to allow padding in either input or output tensors. - * - * @param[in] in Input tensor in NHWC format. - * @param[out] out Output tensor, to be written in NCHW format. - * @param n_batches Number of batches in the tensors. - * @param n_rows Height of the tensor - * @param n_cols Width of the tensor - * @param n_channels Number of channels in the tensors - * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_rows * in_row_stride`. - * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols * in_col_stride`. - * @param in_col_stride Stride over columns in the input tensor. If `0` defaults to `n_channels`. - * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_channels * out_channel_stride`. - * @param out_channel_stride Stride over channels in the output tensor. If `0` defaults to `n_rows * out_row_stride`. - * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols`. - */ -template -inline void nhwc_to_nchw( - const T* const in, // Input data in NHWC form - T* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride=0, - int in_row_stride=0, - int in_col_stride=0, - int out_batch_stride=0, - int out_channel_stride=0, - int out_row_stride=0 -); - -/** Re-order a weight tensor from [Output feature map x Input feature map x - * Height x Width] format to [Height x Width x Input feature map x Output - * feature map] format. - */ -template -inline void ofm_ifm_h_w_to_h_w_ifm_ofm( - const T* const in, // Input in [Output x Input x Height x Width] form - T* const out, // Output in [Height x Width x Input x Output] form - const int n_output_feature_maps, - const int n_input_feature_maps, - const int n_rows, - const int n_cols, - int in_output_feature_map_stride=0, - int in_input_feature_map_stride=0, - int in_row_stride=0, - int out_row_stride=0, - int out_col_stride=0, - int out_input_feature_map_stride=0 -); - -/** Re-order a weight tensor from [Height x Width x Input feature map x Output - * feature map] format to [Output feature map x Input feature map x Height x - * Width] format. - */ -template -inline void h_w_ifm_ofm_to_ofm_ifm_h_w( - const T* const in, // Input in [Height x Width x Input x Output] form - T* const out, // Output in [Output x Input x Height x Width] form - const int n_rows, - const int n_cols, - const int n_input_feature_maps, - const int n_output_feature_maps, - int in_row_stride=0, - int in_col_stride=0, - int in_input_feature_map_stride=0, - int out_output_feature_map_stride=0, - int out_input_feature_map_stride=0, - int out_row_stride=0 -); - -/*****************************************************************************/ -/* 32-bit implementation : NCHW -> NHWC - */ -template <> -inline void nchw_to_nhwc( - const int32_t* const in, - int32_t* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride, - int in_channel_stride, - int in_row_stride, - int out_batch_stride, - int out_row_stride, - int out_col_stride -) -{ - typedef int32_t T; - - // Fill in the stride values - in_row_stride = (in_row_stride) ? in_row_stride : n_cols; - in_channel_stride = (in_channel_stride) ? in_channel_stride - : n_rows * in_row_stride; - in_batch_stride = (in_batch_stride) ? in_batch_stride - : n_channels * in_channel_stride; - - out_col_stride = (out_col_stride) ? out_col_stride : n_channels; - out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride; - out_batch_stride = (out_batch_stride) ? out_batch_stride - : n_rows * out_row_stride; - - // Perform the re-ordering - for (int n = 0; n < n_batches; n++) - { - const T* const in_batch = in + n*in_batch_stride; - T* const out_batch = out + n*out_batch_stride; - - for (int i = 0; i < n_rows; i++) - { - const T* const in_row = in_batch + i*in_row_stride; - T* const out_row = out_batch + i*out_row_stride; - - int j = 0, j_remaining = n_cols; -#ifdef __arm_any__ - for (; j_remaining >= 4; j += 4, j_remaining -= 4) - { - int c = 0, c_remaining = n_channels; - for (; c_remaining >= 4; c += 4, c_remaining -= 4) - { - // Read 4 channels worth of 4 columns, then zip to produce 4 columns - // worth of 4 channels. - int32x4_t channel_pixels[4]; - channel_pixels[0] = vld1q_s32(in_row + (c + 0)*in_channel_stride + j); - channel_pixels[1] = vld1q_s32(in_row + (c + 1)*in_channel_stride + j); - channel_pixels[2] = vld1q_s32(in_row + (c + 2)*in_channel_stride + j); - channel_pixels[3] = vld1q_s32(in_row + (c + 3)*in_channel_stride + j); - - const auto zip1 = vzipq_s32(channel_pixels[0], channel_pixels[2]); - const auto zip2 = vzipq_s32(channel_pixels[1], channel_pixels[3]); - const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]); - const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]); - - vst1q_s32(out_row + (j + 0)*out_col_stride + c, out_0.val[0]); - vst1q_s32(out_row + (j + 1)*out_col_stride + c, out_0.val[1]); - vst1q_s32(out_row + (j + 2)*out_col_stride + c, out_1.val[0]); - vst1q_s32(out_row + (j + 3)*out_col_stride + c, out_1.val[1]); - } - for (; c_remaining; c++, c_remaining--) - { - for (int _j = 0; _j < 4; _j++) - { - const T* const in_col = in_row + j + _j; - T* const out_col = out_row + (j + _j)*out_col_stride; - const T* const in_channel = in_col + c*in_channel_stride; - out_col[c] = *(in_channel); - } - } - } - for (; j_remaining >= 2; j += 2, j_remaining -= 2) - { - int c = 0, c_remaining = n_channels; - for (; c_remaining >= 2; c += 2, c_remaining -= 2) - { - // Read 2 channels worth of 2 columns, then zip to produce 2 columns - // worth of 2 channels. - int32x2_t channel_pixels[2]; - channel_pixels[0] = vld1_s32(in_row + (c + 0)*in_channel_stride + j); - channel_pixels[1] = vld1_s32(in_row + (c + 1)*in_channel_stride + j); - - const auto output = vzip_s32(channel_pixels[0], channel_pixels[1]); - - vst1_s32(out_row + (j + 0)*out_col_stride + c, output.val[0]); - vst1_s32(out_row + (j + 1)*out_col_stride + c, output.val[1]); - } - for (; c_remaining; c++, c_remaining--) - { - for (int _j = 0; _j < 2; _j++) - { - const T* const in_col = in_row + j + _j; - T* const out_col = out_row + (j + _j)*out_col_stride; - const T* const in_channel = in_col + c*in_channel_stride; - out_col[c] = *(in_channel); - } - } - } -#endif // __arm_any__ - for (; j_remaining; j++, j_remaining--) - { - const T* const in_col = in_row + j; - T* const out_col = out_row + j*out_col_stride; - - for (int c = 0; c < n_channels; c++) - { - const T* const in_channel = in_col + c*in_channel_stride; - out_col[c] = *(in_channel); - } - } - } - } -} - -template <> -inline void nchw_to_nhwc( - const uint32_t* const in, - uint32_t* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride, - int in_channel_stride, - int in_row_stride, - int out_batch_stride, - int out_row_stride, - int out_col_stride -) -{ - nchw_to_nhwc( - reinterpret_cast(in), - reinterpret_cast(out), - n_batches, n_channels, n_rows, n_cols, - in_batch_stride, in_channel_stride, in_row_stride, - out_batch_stride, out_row_stride, out_col_stride - ); -} - -template <> -inline void nchw_to_nhwc( - const float* const in, - float* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride, - int in_channel_stride, - int in_row_stride, - int out_batch_stride, - int out_row_stride, - int out_col_stride -) -{ - nchw_to_nhwc( - reinterpret_cast(in), - reinterpret_cast(out), - n_batches, n_channels, n_rows, n_cols, - in_batch_stride, in_channel_stride, in_row_stride, - out_batch_stride, out_row_stride, out_col_stride - ); -} - -/*****************************************************************************/ -/* Generic implementation : NCHW -> NHWC - */ -template -inline void nchw_to_nhwc( - const T* const in, - T* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride, - int in_channel_stride, - int in_row_stride, - int out_batch_stride, - int out_row_stride, - int out_col_stride -) -{ - // Fill in the stride values - in_row_stride = (in_row_stride) ? in_row_stride : n_cols; - in_channel_stride = (in_channel_stride) ? in_channel_stride - : n_rows * in_row_stride; - in_batch_stride = (in_batch_stride) ? in_batch_stride - : n_channels * in_channel_stride; - - out_col_stride = (out_col_stride) ? out_col_stride : n_channels; - out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride; - out_batch_stride = (out_batch_stride) ? out_batch_stride - : n_rows * out_row_stride; - - // Perform the re-ordering - for (int n = 0; n < n_batches; n++) - { - const T* const in_batch = in + n*in_batch_stride; - T* const out_batch = out + n*out_batch_stride; - - for (int i = 0; i < n_rows; i++) - { - const T* const in_row = in_batch + i*in_row_stride; - T* const out_row = out_batch + i*out_row_stride; - - for (int j = 0; j < n_cols; j++) - { - const T* const in_col = in_row + j; - T* const out_col = out_row + j*out_col_stride; - - for (int c = 0; c < n_channels; c++) - { - const T* const in_channel = in_col + c*in_channel_stride; - out_col[c] = *(in_channel); - } - } - } - } -} - -/*****************************************************************************/ -/* 32-bit implementation : NHWC -> NCHW - */ -template <> -inline void nhwc_to_nchw( - const int32_t* const in, // Input data in NHWC form - int32_t* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride, - int in_row_stride, - int in_col_stride, - int out_batch_stride, - int out_channel_stride, - int out_row_stride -) -{ - typedef int32_t T; - - // Fill in stride values - in_col_stride = (in_col_stride) ? in_col_stride : n_channels; - in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride; - in_batch_stride = (in_batch_stride) ? in_batch_stride - : n_rows * in_row_stride; - - out_row_stride = (out_row_stride) ? out_row_stride : n_cols; - out_channel_stride = (out_channel_stride) ? out_channel_stride - : n_rows * out_row_stride; - out_batch_stride = (out_batch_stride) ? out_batch_stride - : n_channels * out_channel_stride; - - // Perform the re-ordering - // For every batch - for (int n = 0; n < n_batches; n++) - { - const T* const in_batch = in + n*in_batch_stride; - T* const out_batch = out + n*out_batch_stride; - - // For every row - for (int i = 0; i < n_rows; i++) - { - const T* const in_i = in_batch + i*in_row_stride; - T* const out_i = out_batch + i*out_row_stride; - - // For every column, beginning with chunks of 4 - int j = 0, j_remaining = n_cols; -#ifdef __arm_any__ - for (; j_remaining >= 4; j += 4, j_remaining -=4) - { - // For every channel, beginning with chunks of 4 - int c = 0, c_remaining = n_channels; - for (; c_remaining >= 4; c += 4, c_remaining -= 4) - { - // Read 4 columns worth of 4 channels then zip to produce 4 channels - // worth of 4 columns. - int32x4_t pixel_channels[4]; - pixel_channels[0] = vld1q_s32(in_i + (j + 0)*in_col_stride + c); - pixel_channels[1] = vld1q_s32(in_i + (j + 1)*in_col_stride + c); - pixel_channels[2] = vld1q_s32(in_i + (j + 2)*in_col_stride + c); - pixel_channels[3] = vld1q_s32(in_i + (j + 3)*in_col_stride + c); - - const auto zip1 = vzipq_s32(pixel_channels[0], pixel_channels[2]); - const auto zip2 = vzipq_s32(pixel_channels[1], pixel_channels[3]); - const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]); - const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]); - - vst1q_s32(out_i + j + (c + 0)*out_channel_stride, out_0.val[0]); - vst1q_s32(out_i + j + (c + 1)*out_channel_stride, out_0.val[1]); - vst1q_s32(out_i + j + (c + 2)*out_channel_stride, out_1.val[0]); - vst1q_s32(out_i + j + (c + 3)*out_channel_stride, out_1.val[1]); - } - for (; c_remaining; c++, c_remaining--) - { - for (int _j = 0; _j < 4; _j++) - { - const T* const in_j = in_i + (j + _j)*in_col_stride; - T* const out_j = out_i + (j + _j); - - const T* const in_channel = in_j + c; - T* const out_channel = out_j + c*out_channel_stride; - *(out_channel) = *(in_channel); - } - } - } - for (; j_remaining >= 2; j += 2, j_remaining -=2) - { - int c = 0, c_remaining = n_channels; - for (; c_remaining >= 2; c += 2, c_remaining -= 2) - { - // Read 2 columns worth of 2 channels then zip to produce 2 channels - // worth of 2 columns. - int32x2_t pixel_channels[2]; - pixel_channels[0] = vld1_s32(in_i + (j + 0)*in_col_stride + c); - pixel_channels[1] = vld1_s32(in_i + (j + 1)*in_col_stride + c); - - const auto output = vzip_s32(pixel_channels[0], pixel_channels[1]); - - vst1_s32(out_i + j + (c + 0)*out_channel_stride, output.val[0]); - vst1_s32(out_i + j + (c + 1)*out_channel_stride, output.val[1]); - } - for (; c_remaining; c++, c_remaining--) - { - for (int _j = 0; _j < 2; _j++) - { - const T* const in_j = in_i + (j + _j)*in_col_stride; - T* const out_j = out_i + (j + _j); - - const T* const in_channel = in_j + c; - T* const out_channel = out_j + c*out_channel_stride; - *(out_channel) = *(in_channel); - } - } - } -#endif // __arm_any__ - for (; j_remaining; j++, j_remaining--) - { - const T* const in_j = in_i + j*in_col_stride; - T* const out_j = out_i + j; - - // For every channel - for (int c = 0; c < n_channels; c++) - { - const T* const in_channel = in_j + c; - T* const out_channel = out_j + c*out_channel_stride; - *(out_channel) = *(in_channel); - } - } - } - } -} - -template <> -inline void nhwc_to_nchw( - const uint32_t* const in, // Input data in NHWC form - uint32_t* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride, - int in_row_stride, - int in_col_stride, - int out_batch_stride, - int out_channel_stride, - int out_row_stride -) -{ - // Redirect to generic 32-bit implementation - nhwc_to_nchw( - reinterpret_cast(in), - reinterpret_cast(out), - n_batches, n_rows, n_cols, n_channels, - in_batch_stride, in_row_stride, in_col_stride, - out_batch_stride, out_channel_stride, out_row_stride - ); -} - -template <> -inline void nhwc_to_nchw( - const float* const in, // Input data in NHWC form - float* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride, - int in_row_stride, - int in_col_stride, - int out_batch_stride, - int out_channel_stride, - int out_row_stride -) -{ - // Redirect to generic 32-bit implementation - nhwc_to_nchw( - reinterpret_cast(in), - reinterpret_cast(out), - n_batches, n_rows, n_cols, n_channels, - in_batch_stride, in_row_stride, in_col_stride, - out_batch_stride, out_channel_stride, out_row_stride - ); -} - -/*****************************************************************************/ -/* Generic implementation : NHWC -> NCHW - */ -template -inline void nhwc_to_nchw( - const T* const in, // Input data in NHWC form - T* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride, - int in_row_stride, - int in_col_stride, - int out_batch_stride, - int out_channel_stride, - int out_row_stride -) -{ - // Fill in stride values - in_col_stride = (in_col_stride) ? in_col_stride : n_channels; - in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride; - in_batch_stride = (in_batch_stride) ? in_batch_stride - : n_rows * in_row_stride; - - out_row_stride = (out_row_stride) ? out_row_stride : n_cols; - out_channel_stride = (out_channel_stride) ? out_channel_stride - : n_rows * out_row_stride; - out_batch_stride = (out_batch_stride) ? out_batch_stride - : n_channels * out_channel_stride; - - // Perform the re-ordering - // For every batch - for (int n = 0; n < n_batches; n++) - { - const T* const in_batch = in + n*in_batch_stride; - T* const out_batch = out + n*out_batch_stride; - - // For every row - for (int i = 0; i < n_rows; i++) - { - const T* const in_i = in_batch + i*in_row_stride; - T* const out_i = out_batch + i*out_row_stride; - - // For every column - for (int j = 0; j < n_cols; j++) - { - const T* const in_j = in_i + j*in_col_stride; - T* const out_j = out_i + j; - - // For every channel - for (int c = 0; c < n_channels; c++) - { - const T* const in_channel = in_j + c; - T* const out_channel = out_j + c*out_channel_stride; - *(out_channel) = *(in_channel); - } - } - } - } -} - -/*****************************************************************************/ -/* Generic weight re-order implementation. - */ -template -inline void ofm_ifm_h_w_to_h_w_ifm_ofm( - const T* const in, // Input in [Output x Input x Height x Width] form - T* const out, // Output in [Height x Width x Input x Output] form - const int n_output_feature_maps, - const int n_input_feature_maps, - const int n_rows, - const int n_cols, - int in_output_feature_map_stride, - int in_input_feature_map_stride, - int in_row_stride, - int out_row_stride, - int out_col_stride, - int out_input_feature_map_stride -) -{ - // Fill in stride values - in_row_stride = (in_row_stride) - ? in_row_stride - : n_cols; - in_input_feature_map_stride = (in_input_feature_map_stride) - ? in_input_feature_map_stride - : n_rows * in_row_stride; - in_output_feature_map_stride = (in_output_feature_map_stride) - ? in_output_feature_map_stride - : n_input_feature_maps * in_input_feature_map_stride; - - out_input_feature_map_stride = (out_input_feature_map_stride) - ? out_input_feature_map_stride - : n_output_feature_maps; - out_col_stride = (out_col_stride) - ? out_col_stride - : n_input_feature_maps * out_input_feature_map_stride; - out_row_stride = (out_row_stride) - ? out_row_stride - : n_cols * out_col_stride; - - // Perform the re-ordering - for (int i = 0; i < n_rows; i++) - { - const T* const in_row = in + i * in_row_stride; - T* out_row = out + i * out_row_stride; - - for (int j = 0; j < n_cols; j++) - { - const T* const in_col = in_row + j; - T* const out_col = out_row + j * out_col_stride; - - for (int ifm = 0; ifm < n_input_feature_maps; ifm++) - { - const T* const in_ifm = in_col + ifm * in_input_feature_map_stride; - T* const out_ifm = out_col + ifm * out_input_feature_map_stride; - - for (int ofm = 0; ofm < n_output_feature_maps; ofm++) - { - const T* const in_ofm = in_ifm + ofm * in_output_feature_map_stride; - T* const out_ofm = out_ifm + ofm; - *(out_ofm) = *(in_ofm); - } - } - } - } -} - -/*****************************************************************************/ -/* Generic weight re-order implementation. - */ -template -inline void h_w_ifm_ofm_to_ofm_ifm_h_w( - const T* const in, // Input in [Height x Width x Input x Output] form - T* const out, // Output in [Output x Input x Height x Width] form - const int n_rows, - const int n_cols, - const int n_input_feature_maps, - const int n_output_feature_maps, - int in_row_stride, - int in_col_stride, - int in_input_feature_map_stride, - int out_output_feature_map_stride, - int out_input_feature_map_stride, - int out_row_stride -) -{ - // Fill in the stride values - in_input_feature_map_stride = (in_input_feature_map_stride) - ? in_input_feature_map_stride - : n_output_feature_maps; - in_col_stride = (in_col_stride) - ? in_col_stride - : n_input_feature_maps * in_input_feature_map_stride; - in_row_stride = (in_row_stride) - ? in_row_stride - : n_cols * in_col_stride; - - out_row_stride = (out_row_stride) - ? out_row_stride - : n_cols; - out_input_feature_map_stride = (out_input_feature_map_stride) - ? out_input_feature_map_stride - : n_rows * out_row_stride; - out_output_feature_map_stride = (out_output_feature_map_stride) - ? out_output_feature_map_stride - : n_input_feature_maps * out_input_feature_map_stride; - - // Perform the re-ordering - for (int i = 0; i < n_rows; i++) - { - const T* const in_row = in + i * in_row_stride; - T* const out_row = out + i * out_row_stride; - - for (int j = 0; j < n_cols; j++) - { - const T* const in_col = in_row + j * in_col_stride; - T* const out_col = out_row + j; - - for (int ifm = 0; ifm < n_input_feature_maps; ifm++) - { - const T* const in_ifm = in_col + ifm * in_input_feature_map_stride; - T* const out_ifm = out_col + ifm * out_input_feature_map_stride; - - for (int ofm = 0; ofm < n_output_feature_maps; ofm++) - { - const T* const in_ofm = in_ifm + ofm; - T* const out_ofm = out_ifm + ofm * out_output_feature_map_stride; - *(out_ofm) = *(in_ofm); - } - } - } - } -} - -} // namespace reorder diff --git a/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp b/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp deleted file mode 100644 index 7738cdb349..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include -#include - -#include "alloc.hpp" - -enum TensorOrder -{ - NHWC, ///< [Batch x Height x Width x Channels] - NCHW, ///< [Batch x Channels x Height x Width] -}; - -struct Tensor4DShape -{ - int n_batches, n_rows, n_cols, n_channels; - TensorOrder ordering; - - // Create a new tensor with the default (NHWC) ordering - inline Tensor4DShape( - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - const TensorOrder ordering=NHWC - ) : n_batches(n_batches), - n_rows(n_rows), - n_cols(n_cols), - n_channels(n_channels), - ordering(ordering) - { - } - - inline int index(const int n, const int i, const int j, const int c) const - { - if (this->ordering == NHWC) - { - return ((n*this->n_rows + i)*this->n_cols + j)*this->n_channels + c; - } - else // NCHW - { - return ((n*this->n_channels + c)*this->n_rows + i)*this->n_cols + j; - } - } - - inline int size() const - { - return n_batches * n_rows * n_cols * n_channels; - } - - inline bool TestEq(const Tensor4DShape& other) const - { - return (n_batches == other.n_batches && - n_rows == other.n_rows && - n_cols == other.n_cols && - n_channels == other.n_channels); - } -}; - - -enum WeightOrder -{ - HWIO, ///< [Height x Width x Input channels x Output channels] - OIHW, ///< [Output channels x Input channels x Height x Width] -}; - -struct KernelShape -{ - int n_output_channels, n_rows, n_cols, n_input_channels; - WeightOrder ordering; - - inline KernelShape( - const int n_output_channels, - const int n_rows, - const int n_cols, - const int n_input_channels, - const WeightOrder ordering=HWIO - ) : n_output_channels(n_output_channels), - n_rows(n_rows), - n_cols(n_cols), - n_input_channels(n_input_channels), - ordering(ordering) - { - } - - inline int index(int oc, int i, int j, int ic) const - { - if (this->ordering == HWIO) - { - return ((i*this->n_cols + j)*this->n_input_channels + ic)*this->n_output_channels + oc; - } - else // OIHW - { - return ((oc*this->n_input_channels + ic)*this->n_rows + i)*this->n_cols + j; - } - } - - inline int size(void) const - { - return n_output_channels * n_rows * n_cols * n_input_channels; - } -}; - - -template -class Tensor4D final -{ - public: - Tensor4D(ShapeT shape) : - shape(shape), - _data(reinterpret_cast(ALLOCATE(size_bytes()))) - { - Clear(); - } - - Tensor4D(const Tensor4D&) = delete; - Tensor4D operator=(const Tensor4D&) = delete; - - ~Tensor4D() { - free(_data); - } - - inline T* ptr() const { - return _data; - } - - inline size_t size_bytes() const { - return shape.size() * sizeof(T); - } - - /* Extract an element of the tensor. - * - * If the shape is a Tensor4DShape then the index is given as batch, row, - * column and channel. If the shape is a KernelShape then the index is - * given as output channel, row, column and input channel. - */ - inline T& element(const int a, const int b, const int c, const int d) const - { - return _data[shape.index(a, b, c, d)]; - } - - inline void Clear() { - Fill(static_cast(0)); - } - - inline void Fill(T val) { - for (int i = 0; i < shape.size(); i++) - _data[i] = val; - } - - const ShapeT shape; - - private: - T* const _data; -}; diff --git a/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp deleted file mode 100644 index 82619f4799..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include "tensor.hpp" - -// Methods to print tensors and weights -void PrintTensor(const Tensor4D& tensor); -void PrintWeights(const Tensor4D& weights); - -// Test the equivalence of two tensors -// Counts the instances that |a - b|/|a| > max_err -bool CmpTensors( - const Tensor4D& a, - const Tensor4D& b, - const float max_err=0.0f -); - -// Fill the tensor with a test pattern -void TestPattern(Tensor4D& tensor); -void TestPattern(Tensor4D& weights); - -// Fill the tensor with random values -void Randomise(Tensor4D& tensor, const int seed=0); -void Randomise(Tensor4D& weights, const int seed=0); diff --git a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp deleted file mode 100644 index b7a9517c65..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2017-2018 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include - -void PrintMatrix(const float *const m, const int M, const int N, const int row_stride); - -constexpr inline int iceildiv(const int a, const int b) -{ - return (a + b - 1) / b; -} - -template -inline T roundup(const T a, const T b) -{ - return b * iceildiv(a, b); -} - -template -struct TypeBounds -{ - static constexpr T lower() noexcept { return std::numeric_limits::has_infinity - ? -std::numeric_limits::infinity() - : std::numeric_limits::lowest(); }; - static constexpr T upper() noexcept { return std::numeric_limits::has_infinity - ? std::numeric_limits::infinity() - : std::numeric_limits::max(); }; -}; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -template<> -struct TypeBounds<__fp16> -{ - static constexpr __fp16 lower() noexcept { return -std::numeric_limits::infinity(); }; - static constexpr __fp16 upper() noexcept { return std::numeric_limits::infinity(); } -}; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -- cgit v1.2.1