From 68dd25fbe6e4d3c3513fa5993863419769aa08fc Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Mon, 19 Oct 2020 16:00:11 +0100 Subject: COMPMID-3637: Move utility headers from arm_compute to src Signed-off-by: Georgios Pinitas Change-Id: If9d6fa8c900b68c4b6fd373f2fc1f9abb83ea917 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4145 Tested-by: Arm Jenkins Reviewed-by: Sang-Hoon Park Comments-Addressed: Arm Jenkins --- arm_compute/core/AccessWindowAutoPadding.h | 85 --- arm_compute/core/AccessWindowStatic.h | 101 --- arm_compute/core/AccessWindowTranspose.h | 48 -- arm_compute/core/CL/CLValidate.h | 61 -- arm_compute/core/CL/ICLGEMMKernelConfiguration.h | 68 -- arm_compute/core/CL/ICLKernel.h | 1 + arm_compute/core/CL/gemm/CLGEMMHelpers.h | 73 -- .../gemm/native/CLGEMMNativeKernelConfiguration.h | 65 -- .../CLGEMMNativeKernelConfigurationBifrost.h | 56 -- .../CLGEMMNativeKernelConfigurationMidgard.h | 51 -- .../CLGEMMNativeKernelConfigurationValhall.h | 53 -- .../reshaped/CLGEMMReshapedKernelConfiguration.h | 63 -- .../CLGEMMReshapedKernelConfigurationBifrost.h | 56 -- .../CLGEMMReshapedKernelConfigurationValhall.h | 53 -- .../CLGEMMReshapedOnlyRHSKernelConfiguration.h | 63 -- ...GEMMReshapedOnlyRHSKernelConfigurationBifrost.h | 59 -- ...GEMMReshapedOnlyRHSKernelConfigurationValhall.h | 53 -- arm_compute/core/CPP/Validate.h | 117 ---- arm_compute/core/GPUTarget.h | 4 +- arm_compute/core/Helpers.h | 615 +---------------- arm_compute/core/Helpers.inl | 134 ---- arm_compute/core/ITensorInfo.h | 4 +- .../NEDepthwiseConvolutionLayerNativeKernel.h | 1 + .../NEON/kernels/assembly/INEGEMMWrapperKernel.h | 108 --- .../NEDepthwiseConvolutionAssemblyKernelWrapper.h | 88 --- .../NEON/kernels/convolution/common/activation.hpp | 37 - .../core/NEON/kernels/convolution/common/alloc.hpp | 31 - .../core/NEON/kernels/convolution/common/arm.hpp | 39 -- .../kernels/convolution/common/convolution.hpp | 29 - .../NEON/kernels/convolution/common/padding.hpp | 91 --- .../core/NEON/kernels/convolution/common/perf.h | 32 - .../NEON/kernels/convolution/common/qasymm8.hpp | 54 -- .../NEON/kernels/convolution/common/qsymm8.hpp | 76 --- .../core/NEON/kernels/convolution/common/shims.hpp | 749 --------------------- .../NEON/kernels/convolution/common/tensor.hpp | 178 ----- .../kernels/convolution/common/tensor_utils.hpp | 46 -- .../core/NEON/kernels/convolution/common/utils.hpp | 60 -- .../kernels/convolution/depthwise/depthwise.hpp | 551 --------------- .../convolution/depthwise/depthwise_dilated.hpp | 156 ----- .../convolution/depthwise/depthwise_quantized.hpp | 291 -------- .../depthwise/depthwise_quantized_dilated.hpp | 88 --- arm_compute/core/SubTensorInfo.h | 3 +- arm_compute/core/utils/helpers/bit_ops.h | 52 -- arm_compute/core/utils/helpers/fft.h | 55 -- arm_compute/core/utils/helpers/float_ops.h | 116 ---- arm_compute/core/utils/helpers/tensor_info.h | 57 -- arm_compute/core/utils/math/SafeOps.h | 6 +- arm_compute/core/utils/misc/CRTP.h | 55 -- arm_compute/core/utils/misc/Cast.h | 119 ---- arm_compute/core/utils/misc/ICloneable.h | 48 -- arm_compute/core/utils/misc/Iterable.h | 108 --- arm_compute/core/utils/misc/Random.h | 98 --- arm_compute/core/utils/misc/Requires.h | 51 -- arm_compute/core/utils/misc/Rounding.h | 205 ------ arm_compute/core/utils/misc/SaturateCast.h | 218 ------ 55 files changed, 13 insertions(+), 5766 deletions(-) delete mode 100644 arm_compute/core/AccessWindowAutoPadding.h delete mode 100644 arm_compute/core/AccessWindowStatic.h delete mode 100644 arm_compute/core/AccessWindowTranspose.h delete mode 100644 arm_compute/core/CL/CLValidate.h delete mode 100644 arm_compute/core/CL/ICLGEMMKernelConfiguration.h delete mode 100644 arm_compute/core/CL/gemm/CLGEMMHelpers.h delete mode 100644 arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h delete mode 100644 arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h delete mode 100644 arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h delete mode 100644 arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h delete mode 100644 arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h delete mode 100644 arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h delete mode 100644 arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h delete mode 100644 arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h delete mode 100644 arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h delete mode 100644 arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h delete mode 100644 arm_compute/core/CPP/Validate.h delete mode 100644 arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h delete mode 100644 arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/activation.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/alloc.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/arm.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/convolution.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/padding.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/perf.h delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/shims.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/tensor.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/common/utils.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp delete mode 100644 arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp delete mode 100644 arm_compute/core/utils/helpers/bit_ops.h delete mode 100644 arm_compute/core/utils/helpers/fft.h delete mode 100644 arm_compute/core/utils/helpers/float_ops.h delete mode 100644 arm_compute/core/utils/helpers/tensor_info.h delete mode 100644 arm_compute/core/utils/misc/CRTP.h delete mode 100644 arm_compute/core/utils/misc/Cast.h delete mode 100644 arm_compute/core/utils/misc/ICloneable.h delete mode 100644 arm_compute/core/utils/misc/Iterable.h delete mode 100644 arm_compute/core/utils/misc/Random.h delete mode 100644 arm_compute/core/utils/misc/Requires.h delete mode 100644 arm_compute/core/utils/misc/Rounding.h delete mode 100644 arm_compute/core/utils/misc/SaturateCast.h (limited to 'arm_compute/core') diff --git a/arm_compute/core/AccessWindowAutoPadding.h b/arm_compute/core/AccessWindowAutoPadding.h deleted file mode 100644 index 12d65532cb..0000000000 --- a/arm_compute/core/AccessWindowAutoPadding.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H -#define ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class Window; -class ITensorInfo; - -/** Dummy access window. - * - * This implementation always uses the auto padding of the tensor info and - * never updates the window. The valid region is always set to cover the entire - * tensor. - * - * @note This access window is only used during the migration to the new - * padding system. It will be removed once all kernels have been ported. - * - * */ -class AccessWindowAutoPadding : public IAccessWindow -{ -public: - /** Default constructor. - * - * @param[in,out] info Tensor info of the accessed kernel. - */ - AccessWindowAutoPadding(ITensorInfo *info); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - AccessWindowAutoPadding(const AccessWindowAutoPadding &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - AccessWindowAutoPadding &operator=(const AccessWindowAutoPadding &) = delete; - /** Allow instances of this class to be move constructed */ - AccessWindowAutoPadding(AccessWindowAutoPadding &&) = default; - /** Allow instances of this class to be moved */ - AccessWindowAutoPadding &operator=(AccessWindowAutoPadding &&) = default; - /** Default destructor */ - ~AccessWindowAutoPadding() = default; - - /** Set the valid region to match the entire tensor. */ - void set_valid_region(); - - /** Return a valid region that spans across the entire tensor. - * - * @return a valid region. - * - */ - ValidRegion compute_valid_region() const; - - // Inherited methods overridden: - bool update_window_if_needed(Window &window) const override; - bool update_padding_if_needed(const Window &window) override; - ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; - -private: - ITensorInfo *_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H*/ diff --git a/arm_compute/core/AccessWindowStatic.h b/arm_compute/core/AccessWindowStatic.h deleted file mode 100644 index 1f2ca1b470..0000000000 --- a/arm_compute/core/AccessWindowStatic.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IACCESS_WINDOW_STATIC_H -#define ARM_COMPUTE_IACCESS_WINDOW_STATIC_H - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" - -#include - -namespace arm_compute -{ -class Window; -class ITensorInfo; - -/** Implementation of a static rectangular access pattern. - * - * In this implementation the access offsets and sizes are not relative to the - * current element. Instead they are considered to be absolute coordinates - * within the accessed tensor's shape. - * - * */ -class AccessWindowStatic : public IAccessWindow -{ -public: - /** Constructor for a static access pattern. - * - * @param[in,out] info Tensor info of the accessed kernel. - * @param[in] start_x Start of the access in X direction. - * @param[in] start_y Start of the access in Y direction. - * @param[in] end_x End of the access in X direction. - * @param[in] end_y End of the access in Y direction. - */ - AccessWindowStatic(ITensorInfo *info, int start_x, int start_y, int end_x, int end_y); - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - AccessWindowStatic(const AccessWindowStatic &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - AccessWindowStatic &operator=(const AccessWindowStatic &) = delete; - /** Allow instances of this class to be move constructed */ - AccessWindowStatic(AccessWindowStatic &&) = default; - /** Allow instances of this class to be moved */ - AccessWindowStatic &operator=(AccessWindowStatic &&) = default; - /** Default destructor */ - ~AccessWindowStatic() = default; - - /** Set the valid region based on the static access pattern and valid - * region of the inputs. - * - * @param[in] window Execution window of the kernel. - * @param[in] input_valid_region Combined valid region of all inputs. - */ - void set_valid_region(const Window &window, const ValidRegion &input_valid_region); - - /** Compute the valid region based on the static access pattern and valid region of the inputs. - * - * @param[in] window Execution window of the kernel. - * @param[in] input_valid_region Combined valid region of all inputs. - * - * @return a valid region. - * - */ - ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region) const; - - // Inherited methods overriden: - bool update_window_if_needed(Window &window) const override; - bool update_padding_if_needed(const Window &window) override; - ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; - -private: - ITensorInfo *_info; - int _start_x; - int _start_y; - int _end_x; - int _end_y; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_IACCESS_WINDOW_STATIC_H*/ diff --git a/arm_compute/core/AccessWindowTranspose.h b/arm_compute/core/AccessWindowTranspose.h deleted file mode 100644 index 85709092c3..0000000000 --- a/arm_compute/core/AccessWindowTranspose.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H -#define ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class Window; -class ITensorInfo; - -/** Implementation of a XY-transpose access pattern. */ -class AccessWindowTranspose : public AccessWindowRectangle -{ -public: - using AccessWindowRectangle::AccessWindowRectangle; - bool update_window_if_needed(Window &window) const override; - bool update_padding_if_needed(const Window &window) override; - using AccessWindowRectangle::compute_valid_region; - ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H*/ diff --git a/arm_compute/core/CL/CLValidate.h b/arm_compute/core/CL/CLValidate.h deleted file mode 100644 index 3f8b76ba4c..0000000000 --- a/arm_compute/core/CL/CLValidate.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_VALIDATE_H -#define ARM_COMPUTE_CL_VALIDATE_H - -#include "arm_compute/core/Validate.h" - -namespace arm_compute -{ -#define ARM_COMPUTE_ERROR_ON_F16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported())) - -#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported())) - -/** Return an error if int64_base_atomics extension is not supported by the device. - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * - * @return Status - */ -inline arm_compute::Status error_on_unsupported_int64_base_atomics(const char *function, const char *file, const int line) -{ - if(!CLKernelLibrary::get().int64_base_atomics_supported()) - { - return ARM_COMPUTE_CREATE_ERROR_LOC(arm_compute::ErrorCode::UNSUPPORTED_EXTENSION_USE, function, file, line, "Atomic functions are not supported"); - } - return arm_compute::Status{}; -} - -#define ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__)); - -#define ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__)); - -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_VALIDATE_H */ diff --git a/arm_compute/core/CL/ICLGEMMKernelConfiguration.h b/arm_compute/core/CL/ICLGEMMKernelConfiguration.h deleted file mode 100644 index 90600efba5..0000000000 --- a/arm_compute/core/CL/ICLGEMMKernelConfiguration.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H -#define ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H - -#include "arm_compute/core/GPUTarget.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -/** Basic interface for the GEMM kernel configuration */ -class ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] arch GPU target - */ - ICLGEMMKernelConfiguration(GPUTarget arch) - : _target(arch) - { - } - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLGEMMKernelConfiguration(const ICLGEMMKernelConfiguration &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLGEMMKernelConfiguration &operator=(const ICLGEMMKernelConfiguration &) = delete; - /** Default Move Constructor. */ - ICLGEMMKernelConfiguration(ICLGEMMKernelConfiguration &&) = default; - /** Default move assignment operator */ - ICLGEMMKernelConfiguration &operator=(ICLGEMMKernelConfiguration &&) = default; - /** Virtual destructor */ - virtual ~ICLGEMMKernelConfiguration() = default; - /** Given M, N, K and B, this method returns the @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo to be used - * - * @param[in] m Number of rows LHS matrix - * @param[in] n Number of columns RHS matrix - * @param[in] k Number of columns LHS matrix or number of rows RHS matrix - * @param[in] b Batch size - * @param[in] data_type Data type - */ - virtual std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) = 0; - -protected: - GPUTarget _target; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H */ diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h index d4990a1dee..a24cd8c798 100644 --- a/arm_compute/core/CL/ICLKernel.h +++ b/arm_compute/core/CL/ICLKernel.h @@ -29,6 +29,7 @@ #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/GPUTarget.h" #include "arm_compute/core/IKernel.h" +#include "arm_compute/core/Validate.h" #include "arm_compute/core/experimental/Types.h" #include diff --git a/arm_compute/core/CL/gemm/CLGEMMHelpers.h b/arm_compute/core/CL/gemm/CLGEMMHelpers.h deleted file mode 100644 index 013c068cf7..0000000000 --- a/arm_compute/core/CL/gemm/CLGEMMHelpers.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMHELPERS_H -#define ARM_COMPUTE_CLGEMMHELPERS_H - -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensorInfo; -struct GEMMRHSMatrixInfo; - -namespace cl_gemm -{ -/** Configure @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo - * - * @param[in] m Number of rows (M) in the LHS matrix not reshaped - * @param[in] n Number of columns (N) in the RHS matrix not reshaped - * @param[in] m0 Number of rows processed by each thread/work-item - * @param[in] n0 Number of columns processed by each thread/work-item - * @param[in] k0 Number of inner accumulation performed by each thread/work-item - * @param[in] v0 Number of vertical blocks of size (m0xk0) stored on the same output row - * @param[in] h0 Number of horizontal blocks of size (k0xn0) stored on the same output row - * @param[in] lhs_interleave True if the v0 (m0xk0) blocks have to be interleaved in the output row - * @param[in] rhs_interleave True if the h0 (k0xn0) blocks have to be interleaved in the output row - * @param[in] lhs_transpose True if the (m0xk0) block has to be transposed before been stored - * @param[in] rhs_transpose True if the (k0xn0) block has to be transposed before been stored - * @param[in] export_to_cl_image (Optional) True if the RHS reshaped matrix has to be exported to cl_image - * - * @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo - */ -std::pair configure_lhs_rhs_info(unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, - bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose, bool export_to_cl_image = false); - -/** Update padding required to export the OpenCL buffer to OpenCL image2d - * - * @param[in,out] tensor ITensorInfo of the tensor required to be exported to OpenCL image2d - */ -void update_padding_for_cl_image(ITensorInfo *tensor); - -/** Utility function to validate the image2d OpenCL object support on the RHS reshaped matrix - * - * @param[in] tensor_reshaped_info TensorInfo for the RHS reshaped matrix - * @param[in] rhs_info @ref GEMMRHSMatrixInfo - * - * @return Status reporting if we can use the image2d OpenCL object on the RHS reshaped matrix - */ -Status validate_image2d_support_on_rhs(const ITensorInfo &tensor_reshaped_info, const GEMMRHSMatrixInfo &rhs_info); -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMHELPERS_H */ diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h deleted file mode 100644 index 7270a8e6db..0000000000 --- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H -#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" -#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h" -#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h" -#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h" - -#include - -namespace arm_compute -{ -namespace cl_gemm -{ -/** CLGEMMNative factory class */ -class CLGEMMNativeKernelConfigurationFactory final -{ -public: - /** Static method to construct CLGEMMNative kernel object accordingly with the GPU target - * - * @param[in] gpu GPU target - * - * @return CLGEMMNative kernel configuration class - */ - static std::unique_ptr create(GPUTarget gpu) - { - switch(get_arch_from_target(gpu)) - { - case GPUTarget::MIDGARD: - return support::cpp14::make_unique(gpu); - case GPUTarget::BIFROST: - return support::cpp14::make_unique(gpu); - case GPUTarget::VALHALL: - return support::cpp14::make_unique(gpu); - default: - ARM_COMPUTE_ERROR("Not supported GPU target"); - } - } -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H */ diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h deleted file mode 100644 index 1e4989615e..0000000000 --- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H -#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Bifrost based OpenCL GEMMNative configuration */ -class CLGEMMNativeKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] gpu GPU target - */ - CLGEMMNativeKernelConfigurationBifrost(GPUTarget gpu); - - // Inherited overridden method - std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; - -private: - std::pair configure_G71_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G71_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_default_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H */ diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h deleted file mode 100644 index 4cebfceb75..0000000000 --- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H -#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Midgard based OpenCL GEMMNative configuration */ -class CLGEMMNativeKernelConfigurationMidgard final : public ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] gpu GPU target - */ - CLGEMMNativeKernelConfigurationMidgard(GPUTarget gpu); - - // Inherited overridden method - std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; - -private: - std::pair default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H */ diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h deleted file mode 100644 index 07389ea76f..0000000000 --- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H -#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Valhall based OpenCL GEMMNative configuration */ -class CLGEMMNativeKernelConfigurationValhall final : public ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] gpu GPU target - */ - CLGEMMNativeKernelConfigurationValhall(GPUTarget gpu); - - // Inherited overridden method - std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; - -private: - std::pair configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H */ diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h deleted file mode 100644 index b953fd264f..0000000000 --- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H -#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" -#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h" -#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h" - -#include - -namespace arm_compute -{ -namespace cl_gemm -{ -/** CLGEMMReshaped factory class */ -class CLGEMMReshapedKernelConfigurationFactory final -{ -public: - /** Static method to call the CLGEMMReshaped kernel configuration class accordingly with the GPU target - * - * @param[in] gpu GPU target - * - * @return CLGEMMReshaped kernel configuration class - */ - static std::unique_ptr create(GPUTarget gpu) - { - switch(get_arch_from_target(gpu)) - { - case GPUTarget::MIDGARD: - case GPUTarget::BIFROST: - return support::cpp14::make_unique(gpu); - case GPUTarget::VALHALL: - return support::cpp14::make_unique(gpu); - default: - ARM_COMPUTE_ERROR("Not supported GPU target"); - } - } -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H */ diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h deleted file mode 100644 index 4df27843aa..0000000000 --- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H -#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Bifrost based OpenCL GEMMReshaped configuration */ -class CLGEMMReshapedKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] gpu GPU target - */ - CLGEMMReshapedKernelConfigurationBifrost(GPUTarget gpu); - - // Inherited overridden method - std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; - -private: - std::pair configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H */ diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h deleted file mode 100644 index 7a617e05be..0000000000 --- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H -#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Valhall based OpenCL GEMMReshaped configuration */ -class CLGEMMReshapedKernelConfigurationValhall final : public ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] gpu GPU target - */ - CLGEMMReshapedKernelConfigurationValhall(GPUTarget gpu); - - // Inherited overridden method - std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; - -private: - std::pair configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H */ diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h deleted file mode 100644 index 6d5ce8835b..0000000000 --- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H -#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" -#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h" -#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h" - -#include - -namespace arm_compute -{ -namespace cl_gemm -{ -/** CLGEMMReshapedOnlyRHS factory class */ -class CLGEMMReshapedOnlyRHSKernelConfigurationFactory final -{ -public: - /** Static method to call the CLGEMMReshapedOnlyRHS kernel configuration class accordingly with the GPU target - * - * @param[in] gpu GPU target - * - * @return CLGEMMReshapedOnlyRHS kernel configuration class - */ - static std::unique_ptr create(GPUTarget gpu) - { - switch(get_arch_from_target(gpu)) - { - case GPUTarget::MIDGARD: - case GPUTarget::BIFROST: - return support::cpp14::make_unique(gpu); - case GPUTarget::VALHALL: - return support::cpp14::make_unique(gpu); - default: - ARM_COMPUTE_ERROR("Not supported GPU target"); - } - } -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H */ diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h deleted file mode 100644 index 346bfd7b91..0000000000 --- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H -#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Bifrost based OpenCL GEMMReshapedOnlyRHS configuration */ -class CLGEMMReshapedOnlyRHSKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] gpu GPU target - */ - CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(GPUTarget gpu); - - // Inherited overridden method - std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; - -private: - std::pair configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H */ diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h deleted file mode 100644 index 2162baf338..0000000000 --- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H -#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Valhall based OpenCL GEMMReshapedOnlyRHS configuration */ -class CLGEMMReshapedOnlyRHSKernelConfigurationValhall final : public ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] gpu GPU target - */ - CLGEMMReshapedOnlyRHSKernelConfigurationValhall(GPUTarget gpu); - - // Inherited overridden method - std::pair configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; - -private: - std::pair configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H */ diff --git a/arm_compute/core/CPP/Validate.h b/arm_compute/core/CPP/Validate.h deleted file mode 100644 index 9e95f72c3f..0000000000 --- a/arm_compute/core/CPP/Validate.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CPP_VALIDATE_H -#define ARM_COMPUTE_CPP_VALIDATE_H - -#include "arm_compute/core/Validate.h" - -namespace arm_compute -{ -/** Return an error if the data type of the passed tensor info is FP16 and FP16 support is not compiled in. - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * @param[in] tensor_info Tensor info to validate. - * - * @return Status - */ -inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); -#ifndef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::F16, - function, file, line, "This CPU architecture does not support F16 data type, you need v8.2 or above"); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - return Status {}; -} - -/** Return an error if the data type of the passed tensor info is BFLOAT16 and BFLOAT16 support is not compiled in. - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * @param[in] tensor_info Tensor info to validate. - * - * @return Status - */ -inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); -#if !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)) - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::BFLOAT16, - function, file, line, "This CPU architecture does not support BFloat16 data type, you need v8.6 or above"); -#endif /* !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)) */ - return Status {}; -} - -/** Return an error if the data type of the passed tensor is FP16 and FP16 support is not compiled in. - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * @param[in] tensor Tensor to validate. - * - * @return Status - */ -inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, - const ITensor *tensor) -{ - ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(function, file, line, tensor->info())); - return Status{}; -} - -/** Return an error if the data type of the passed tensor is BFLOAT16 and BFLOAT16 support is not compiled in. - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * @param[in] tensor Tensor to validate. - * - * @return Status - */ -inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line, - const ITensor *tensor) -{ - ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(function, file, line, tensor->info())); - return Status{}; -} - -#define ARM_COMPUTE_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor)) - -#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor)) - -#define ARM_COMPUTE_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_bf16(__func__, __FILE__, __LINE__, tensor)) - -#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(__func__, __FILE__, __LINE__, tensor)) -} // namespace arm_compute -#endif /* ARM_COMPUTE_CPP_VALIDATE_H */ diff --git a/arm_compute/core/GPUTarget.h b/arm_compute/core/GPUTarget.h index 06025ca3ae..b8143f8d5c 100644 --- a/arm_compute/core/GPUTarget.h +++ b/arm_compute/core/GPUTarget.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ #ifndef ARM_COMPUTE_GPUTARGET_H #define ARM_COMPUTE_GPUTARGET_H -#include "arm_compute/core/Helpers.h" +#include "support/Traits.h" #include diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h index 90dd6082e1..5a8d6efe9d 100644 --- a/arm_compute/core/Helpers.h +++ b/arm_compute/core/Helpers.h @@ -24,23 +24,17 @@ #ifndef ARM_COMPUTE_HELPERS_H #define ARM_COMPUTE_HELPERS_H -#include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/Steps.h" -#include "arm_compute/core/Strides.h" -#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include "support/MemorySupport.h" #include #include #include -#include #include -#include -#include namespace arm_compute { @@ -48,307 +42,6 @@ class IKernel; class ITensor; class ITensorInfo; -/** Disable bitwise operations by default */ -template -struct enable_bitwise_ops -{ - static constexpr bool value = false; /**< Disabled */ -}; - -#ifndef DOXYGEN_SKIP_THIS -template -typename std::enable_if::value, T>::type operator&(T lhs, T rhs) -{ - using underlying_type = typename std::underlying_type::type; - return static_cast(static_cast(lhs) & static_cast(rhs)); -} -#endif /* DOXYGEN_SKIP_THIS */ - -/** Helper function to create and return a unique_ptr pointed to a CL/GLES kernel object - * It also calls the kernel's configuration. - * - * @param[in] args All the arguments that need pass to kernel's configuration. - * - * @return A unique pointer pointed to a CL/GLES kernel object - */ -template -std::unique_ptr create_configure_kernel(T &&... args) -{ - std::unique_ptr k = arm_compute::support::cpp14::make_unique(); - k->configure(std::forward(args)...); - return k; -} - -/** Helper function to create and return a unique_ptr pointed to a CL/GLES kernel object - * - * @return A unique pointer pointed to a Kernel kernel object - */ -template -std::unique_ptr create_kernel() -{ - std::unique_ptr k = arm_compute::support::cpp14::make_unique(); - return k; -} - -namespace traits -{ -/** Check if a type T is contained in a tuple Tuple of types */ -template -struct is_contained; - -template -struct is_contained> : std::false_type -{ -}; - -template -struct is_contained> : std::true_type -{ -}; - -template -struct is_contained> : is_contained> -{ -}; -} - -/** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. - * - * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. - * @param[in] stride Stride to access the bottom-left and bottom-right pixel values - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer - * - * @note dx and dy must be in the range [0, 1.0] - * - * @return The bilinear interpolated pixel value - */ -template -inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const float dx1 = 1.0f - dx; - const float dy1 = 1.0f - dy; - - const T a00 = *pixel_ptr; - const T a01 = *(pixel_ptr + 1); - const T a10 = *(pixel_ptr + stride); - const T a11 = *(pixel_ptr + stride + 1); - - const float w1 = dx1 * dy1; - const float w2 = dx * dy1; - const float w3 = dx1 * dy; - const float w4 = dx * dy; - - return static_cast(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4); -} - -/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8 and in single channel format. - * - * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. - * @param[in] stride Stride to access the bottom-left and bottom-right pixel values - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer - * @param[in] iq_info Input QuantizationInfo - * @param[in] oq_info Output QuantizationInfo - * - * @note dx and dy must be in the range [0, 1.0] - * - * @return The bilinear interpolated pixel value - */ -inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stride, float dx, float dy, UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const float dx1 = 1.0f - dx; - const float dy1 = 1.0f - dy; - - const float a00 = dequantize_qasymm8(*pixel_ptr, iq_info); - const float a01 = dequantize_qasymm8(*(pixel_ptr + 1), iq_info); - const float a10 = dequantize_qasymm8(*(pixel_ptr + stride), iq_info); - const float a11 = dequantize_qasymm8(*(pixel_ptr + stride + 1), iq_info); - - const float w1 = dx1 * dy1; - const float w2 = dx * dy1; - const float w3 = dx1 * dy; - const float w4 = dx * dy; - float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; - return static_cast(quantize_qasymm8(res, oq_info)); -} - -/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8_SIGNED and in single channel format. - * - * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. - * @param[in] stride Stride to access the bottom-left and bottom-right pixel values - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer - * @param[in] iq_info Input QuantizationInfo - * @param[in] oq_info Output QuantizationInfo - * - * @note dx and dy must be in the range [0, 1.0] - * - * @return The bilinear interpolated pixel value - */ -inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, size_t stride, float dx, float dy, UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const float dx1 = 1.0f - dx; - const float dy1 = 1.0f - dy; - - const float a00 = dequantize_qasymm8_signed(*pixel_ptr, iq_info); - const float a01 = dequantize_qasymm8_signed(*(pixel_ptr + 1), iq_info); - const float a10 = dequantize_qasymm8_signed(*(pixel_ptr + stride), iq_info); - const float a11 = dequantize_qasymm8_signed(*(pixel_ptr + stride + 1), iq_info); - - const float w1 = dx1 * dy1; - const float w2 = dx * dy1; - const float w3 = dx1 * dy; - const float w4 = dx * dy; - float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; - return static_cast(quantize_qasymm8_signed(res, oq_info)); -} - -/** Computes linear interpolation using the pointer to the top pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. - * - * @param[in] pixel_ptr Pointer to the top pixel value of a single channel input. - * @param[in] stride Stride to access the bottom pixel value - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer - * - * @note dy must be in the range [0, 1.0] - * - * @return The linear interpolated pixel value - */ -template -inline T delta_linear_c1_y(const T *pixel_ptr, size_t stride, float dy) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const float dy1 = 1.0f - dy; - - const T a00 = *pixel_ptr; - const T a10 = *(pixel_ptr + stride); - - const float w1 = dy1; - const float w3 = dy; - - return static_cast(a00 * w1 + a10 * w3); -} -/** Computes linear interpolation using the pointer to the left pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. - * - * @param[in] pixel_ptr Pointer to the left pixel value of a single channel input. - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer - * - * @note dx must be in the range [0, 1.0] - * - * @return The linear interpolated pixel value - */ -template -inline T delta_linear_c1_x(const T *pixel_ptr, float dx) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const T a00 = *pixel_ptr; - const T a01 = *(pixel_ptr + 1); - - const float dx1 = 1.0f - dx; - - const float w1 = dx1; - const float w2 = dx; - - return static_cast(a00 * w1 + a01 * w2); -} -/** Return the pixel at (x,y) using bilinear interpolation. - * - * @warning Only works if the iterator was created with an IImage - * - * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel input. - * @param[in] stride Stride in bytes of the image; - * @param[in] x X position of the wanted pixel - * @param[in] y Y position of the wanted pixel - * - * @return The pixel at (x, y) using bilinear interpolation. - */ -template -inline T pixel_bilinear_c1(const T *first_pixel_ptr, size_t stride, float x, float y) -{ - ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); - - const int32_t xi = std::floor(x); - const int32_t yi = std::floor(y); - - const float dx = x - xi; - const float dy = y - yi; - - return delta_bilinear_c1(first_pixel_ptr + xi + yi * stride, stride, dx, dy); -} - -/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel input - * - * @warning Only works if the iterator was created with an IImage - * - * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel image. - * @param[in] stride Stride in bytes of the image - * @param[in] width Width of the image - * @param[in] height Height of the image - * @param[in] x X position of the wanted pixel - * @param[in] y Y position of the wanted pixel - * - * @return The pixel at (x, y) using bilinear interpolation. - */ -template -inline uint8_t pixel_bilinear_c1_clamp(const T *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y) -{ - ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); - - x = std::max(-1.f, std::min(x, static_cast(width))); - y = std::max(-1.f, std::min(y, static_cast(height))); - - const float xi = std::floor(x); - const float yi = std::floor(y); - - const float dx = x - xi; - const float dy = y - yi; - - if(dx == 0.0f) - { - if(dy == 0.0f) - { - return static_cast(first_pixel_ptr[static_cast(xi) + static_cast(yi) * stride]); - } - return delta_linear_c1_y(first_pixel_ptr + static_cast(xi) + static_cast(yi) * stride, stride, dy); - } - if(dy == 0.0f) - { - return delta_linear_c1_x(first_pixel_ptr + static_cast(xi) + static_cast(yi) * stride, dx); - } - return delta_bilinear_c1(first_pixel_ptr + static_cast(xi) + static_cast(yi) * stride, stride, dx, dy); -} - -/** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8 - * - * @note The interpolation area depends on the width and height ration of the input and output images - * @note Currently average of the contributing pixels is calculated - * - * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. - * @param[in] stride Stride in bytes of the image - * @param[in] width Width of the image - * @param[in] height Height of the image - * @param[in] wr Width ratio among the input image width and output image width. - * @param[in] hr Height ratio among the input image height and output image height. - * @param[in] x X position of the wanted pixel - * @param[in] y Y position of the wanted pixel - * - * @return The pixel at (x, y) using area interpolation. - */ -inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y); - /** Iterator updated by @ref execute_window_loop for each window element */ class Iterator { @@ -421,179 +114,6 @@ private: template inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators); -/** Update window and padding size for each of the access patterns. - * - * First the window size is reduced based on all access patterns that are not - * allowed to modify the padding of the underlying tensor. Then the padding of - * the remaining tensors is increased to match the window. - * - * @param[in] win Window that is used by the kernel. - * @param[in] patterns Access patterns used to calculate the final window and padding. - * - * @return True if the window has been changed. Changes to the padding do not - * influence the returned value. - */ -template -bool update_window_and_padding(Window &win, Ts &&... patterns) -{ - bool window_changed = false; - - utility::for_each([&](const IAccessWindow & w) - { - window_changed |= w.update_window_if_needed(win); - }, - patterns...); - - bool padding_changed = false; - - utility::for_each([&](IAccessWindow & w) - { - padding_changed |= w.update_padding_if_needed(win); - }, - patterns...); - - return window_changed; -} - -/** Calculate the maximum window for a given tensor shape and border setting - * - * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created. - * @param[in] steps (Optional) Number of elements processed for each step. - * @param[in] skip_border (Optional) If true exclude the border region from the window. - * @param[in] border_size (Optional) Border size. - * - * @return The maximum window the kernel can be executed on. - */ -Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); - -/** Calculate the maximum window for a given tensor shape and border setting - * - * @param[in] info Tensor info object defining the shape of the object for which the window is created. - * @param[in] steps (Optional) Number of elements processed for each step. - * @param[in] skip_border (Optional) If true exclude the border region from the window. - * @param[in] border_size (Optional) Border size. - * - * @return The maximum window the kernel can be executed on. - */ -inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()) -{ - return calculate_max_window(info.valid_region(), steps, skip_border, border_size); -} - -/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting - * - * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created. - * @param[in] steps (Optional) Number of elements processed for each step. - * @param[in] skip_border (Optional) If true exclude the border region from the window. - * @param[in] border_size (Optional) Border size. The border region will be excluded from the window. - * - * @return The maximum window the kernel can be executed on. - */ -Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); - -/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting - * - * @param[in] info Tensor info object defining the shape of the object for which the window is created. - * @param[in] steps (Optional) Number of elements processed for each step. - * @param[in] skip_border (Optional) If true exclude the border region from the window. - * @param[in] border_size (Optional) Border size. - * - * @return The maximum window the kernel can be executed on. - */ -inline Window calculate_max_window_horizontal(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()) -{ - return calculate_max_window_horizontal(info.valid_region(), steps, skip_border, border_size); -} - -/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border. - * - * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created. - * @param[in] steps (Optional) Number of elements processed for each step. - * @param[in] border_size (Optional) Border size. The border region will be included in the window. - * - * @return The maximum window the kernel can be executed on. - */ -Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps = Steps(), BorderSize border_size = BorderSize()); - -/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border. - * - * @param[in] info Tensor info object defining the shape of the object for which the window is created. - * @param[in] steps (Optional) Number of elements processed for each step. - * @param[in] border_size (Optional) Border size. The border region will be included in the window. - * - * @return The maximum window the kernel can be executed on. - */ -inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps &steps = Steps(), BorderSize border_size = BorderSize()) -{ - return calculate_max_enlarged_window(info.valid_region(), steps, border_size); -} - -/** Intersect multiple valid regions. - * - * @param[in] regions Valid regions. - * - * @return Intersection of all regions. - */ -template -ValidRegion intersect_valid_regions(const Ts &... regions) -{ - auto intersect = [](const ValidRegion & r1, const ValidRegion & r2) -> ValidRegion - { - ValidRegion region; - - for(size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d) - { - region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d])); - } - - for(size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d) - { - region.shape.set(d, std::min(r1.shape[d], r2.shape[d])); - } - - return region; - }; - - return utility::foldl(intersect, regions...); -} - -/** Create a strides object based on the provided strides and the tensor dimensions. - * - * @param[in] info Tensor info object providing the shape of the tensor for unspecified strides. - * @param[in] stride_x Stride to be used in X dimension (in bytes). - * @param[in] fixed_strides Strides to be used in higher dimensions starting at Y (in bytes). - * - * @return Strides object based on the specified strides. Missing strides are - * calculated based on the tensor shape and the strides of lower dimensions. - */ -template -inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fixed_strides) -{ - const TensorShape &shape = info.tensor_shape(); - - // Create strides object - Strides strides(stride_x, fixed_strides...); - - for(size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i) - { - strides.set(i, shape[i - 1] * strides[i - 1]); - } - - return strides; -} - -/** Create a strides object based on the tensor dimensions. - * - * @param[in] info Tensor info object used to compute the strides. - * - * @return Strides object based on element size and tensor shape. - */ -template -inline Strides compute_strides(const ITensorInfo &info) -{ - return compute_strides(info, info.element_size()); -} - /** Permutes given Dimensions according to a permutation vector * * @warning Validity of permutation is not checked @@ -629,79 +149,6 @@ inline void permute(TensorShape &shape, const PermutationVector &perm) } } -/** Auto initialize the tensor info (shape, number of channels and data type) if the current assignment is empty. - * - * @param[in,out] info Tensor info used to check and assign. - * @param[in] shape New shape. - * @param[in] num_channels New number of channels. - * @param[in] data_type New data type - * @param[in] quantization_info (Optional) New quantization info - * - * @return True if the tensor info has been initialized - */ -bool auto_init_if_empty(ITensorInfo &info, - const TensorShape &shape, - int num_channels, DataType data_type, - QuantizationInfo quantization_info = QuantizationInfo()); - -/** Auto initialize the tensor info using another tensor info. - * - * @param info_sink Tensor info used to check and assign - * @param info_source Tensor info used to assign - * - * @return True if the tensor info has been initialized - */ -bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source); - -/** Set the shape to the specified value if the current assignment is empty. - * - * @param[in,out] info Tensor info used to check and assign. - * @param[in] shape New shape. - * - * @return True if the shape has been changed. - */ -bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape); - -/** Set the format, data type and number of channels to the specified value if - * the current data type is unknown. - * - * @param[in,out] info Tensor info used to check and assign. - * @param[in] format New format. - * - * @return True if the format has been changed. - */ -bool set_format_if_unknown(ITensorInfo &info, Format format); - -/** Set the data type and number of channels to the specified value if - * the current data type is unknown. - * - * @param[in,out] info Tensor info used to check and assign. - * @param[in] data_type New data type. - * - * @return True if the data type has been changed. - */ -bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type); - -/** Set the data layout to the specified value if - * the current data layout is unknown. - * - * @param[in,out] info Tensor info used to check and assign. - * @param[in] data_layout New data layout. - * - * @return True if the data type has been changed. - */ -bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout); - -/** Set the quantization info to the specified value if - * the current quantization info is empty and the data type of asymmetric quantized type - * - * @param[in,out] info Tensor info used to check and assign. - * @param[in] quantization_info Quantization info - * - * @return True if the quantization info has been changed. - */ -bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info); - /** Helper function to calculate the Valid Region for Scale. * * @param[in] src_info Input tensor info used to check. @@ -751,21 +198,6 @@ inline size_t get_data_layout_dimension_index(const DataLayout data_layout, cons */ inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout data_layout, const size_t index); -/** Calculate the normalization dimension index for a given normalization type - * - * @param[in] layout Data layout of the input and output tensor - * @param[in] info Normalization info - * - * @return Normalization dimension index - */ -inline unsigned int get_normalization_dimension_index(DataLayout layout, const NormalizationLayerInfo &info) -{ - const unsigned int width_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::WIDTH); - const unsigned int channel_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::CHANNEL); - - return info.is_in_map() ? width_idx : channel_idx; -} - /** Calculate the number of output tiles required by Winograd Convolution layer. This utility function can be used by the Winograd input transform * to know the number of tiles on the x and y direction * @@ -814,49 +246,6 @@ inline Coordinates &convert_negative_axis(Coordinates &coords, int max_value) } return coords; } - -/** Given an integer value, this function returns the next power of two - * - * @param[in] x Input value - * - * @return the next power of two - */ -inline unsigned int get_next_power_two(unsigned int x) -{ - // Decrement by 1 - x--; - - // Shift right by 1 - x |= x >> 1u; - // Shift right by 2 - x |= x >> 2u; - // Shift right by 4 - x |= x >> 4u; - // Shift right by 8 - x |= x >> 8u; - // Shift right by 16 - x |= x >> 16u; - - // Increment by 1 - x++; - - return x; -} - -/** Given a softmax axis, this function returns the permutation vector required to put the axis to the front - * - * @note This function assumes a tensor rank <= 4 - * - * Axis selects the dimension on which softmax is performed. - * E.g. For input of shape 4x5x6 and axis=1, softmax will be applied to 4x6=24 vectors of size 5. - * Interally softmax kernels is always performed on the first dimension (front dimension), therefore permutation is - * required to put the dimension specified by @p axis to the first dimension. - * - * @param[in] axis Axis on which to perform softmax. Supported: 1, 2, 3 (0 implies no permutation needed) - * - * @return the permutation vector - */ -PermutationVector get_permutation_vector_from_softmax_axis(size_t axis); } // namespace arm_compute #include "arm_compute/core/Helpers.inl" diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl index 5613e8c74e..a960876074 100644 --- a/arm_compute/core/Helpers.inl +++ b/arm_compute/core/Helpers.inl @@ -22,58 +22,12 @@ * SOFTWARE. */ #include "arm_compute/core/Error.h" -#include "arm_compute/core/Validate.h" #include #include namespace arm_compute { -inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y) -{ - ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); - - // Calculate sampling position - float in_x = (x + 0.5f) * wr - 0.5f; - float in_y = (y + 0.5f) * hr - 0.5f; - - // Get bounding box offsets - int x_from = std::floor(x * wr - 0.5f - in_x); - int y_from = std::floor(y * hr - 0.5f - in_y); - int x_to = std::ceil((x + 1) * wr - 0.5f - in_x); - int y_to = std::ceil((y + 1) * hr - 0.5f - in_y); - - // Clamp position to borders - in_x = std::max(-1.f, std::min(in_x, static_cast(width))); - in_y = std::max(-1.f, std::min(in_y, static_cast(height))); - - // Clamp bounding box offsets to borders - x_from = ((in_x + x_from) < -1) ? -1 : x_from; - y_from = ((in_y + y_from) < -1) ? -1 : y_from; - x_to = ((in_x + x_to) > width) ? (width - in_x) : x_to; - y_to = ((in_y + y_to) > height) ? (height - in_y) : y_to; - - // Get pixel index - const int xi = std::floor(in_x); - const int yi = std::floor(in_y); - - // Bounding box elements in each dimension - const int x_elements = (x_to - x_from + 1); - const int y_elements = (y_to - y_from + 1); - ARM_COMPUTE_ERROR_ON(x_elements == 0 || y_elements == 0); - - // Sum pixels in area - int sum = 0; - for(int j = yi + y_from, je = yi + y_to; j <= je; ++j) - { - const uint8_t *ptr = first_pixel_ptr + j * stride + xi + x_from; - sum = std::accumulate(ptr, ptr + x_elements, sum); - } - - // Return average - return sum / (x_elements * y_elements); -} - template struct IncrementIterators { @@ -199,94 +153,6 @@ inline void Iterator::reset(const size_t dimension) } } -inline bool auto_init_if_empty(ITensorInfo &info, - const TensorShape &shape, - int num_channels, - DataType data_type, - QuantizationInfo quantization_info) -{ - if(info.tensor_shape().total_size() == 0) - { - info.set_data_type(data_type); - info.set_num_channels(num_channels); - info.set_tensor_shape(shape); - info.set_quantization_info(quantization_info); - return true; - } - - return false; -} - -inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source) -{ - if(info_sink.tensor_shape().total_size() == 0) - { - info_sink.set_data_type(info_source.data_type()); - info_sink.set_num_channels(info_source.num_channels()); - info_sink.set_tensor_shape(info_source.tensor_shape()); - info_sink.set_quantization_info(info_source.quantization_info()); - info_sink.set_data_layout(info_source.data_layout()); - return true; - } - - return false; -} - -inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape) -{ - if(info.tensor_shape().total_size() == 0) - { - info.set_tensor_shape(shape); - return true; - } - - return false; -} - -inline bool set_format_if_unknown(ITensorInfo &info, Format format) -{ - if(info.data_type() == DataType::UNKNOWN) - { - info.set_format(format); - return true; - } - - return false; -} - -inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type) -{ - if(info.data_type() == DataType::UNKNOWN) - { - info.set_data_type(data_type); - return true; - } - - return false; -} - -inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout) -{ - if(info.data_layout() == DataLayout::UNKNOWN) - { - info.set_data_layout(data_layout); - return true; - } - - return false; -} - -inline bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info) -{ - if(info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type()))) - { - info.set_quantization_info(quantization_info); - return true; - } - - return false; -} - inline Coordinates index2coords(const TensorShape &shape, int index) { int num_elements = shape.total_size(); diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h index c5f0949196..3eb7239460 100644 --- a/arm_compute/core/ITensorInfo.h +++ b/arm_compute/core/ITensorInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,8 +29,8 @@ #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/utils/misc/ICloneable.h" #include "arm_compute/core/utils/misc/Utility.h" +#include "support/ICloneable.h" #include diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h index 335a70fc2b..eba1737a03 100644 --- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h @@ -26,6 +26,7 @@ #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/utils/misc/Traits.h" +#include "support/Requires.h" #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #include diff --git a/arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h b/arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h deleted file mode 100644 index 74161e330e..0000000000 --- a/arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_INEGEMMWRAPPERKERNEL_H -#define ARM_COMPUTE_INEGEMMWRAPPERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Common interface for all the arm_gemm Gemms - */ -class INEGEMMWrapperKernel : public INEKernel -{ -public: - /** Parameters defining the dimensions of the matrices being multiplied */ - struct Params - { - unsigned int M{ 0 }; /**< Rows in output matrix C (and input matrix A). */ - unsigned int N{ 0 }; /**< Columns in output matrix C (and input matrix B). */ - unsigned int K{ 0 }; /**< Columns of input matrix A (= rows of input matrix B). */ - unsigned int batches{ 0 }; /**< Number of "batched" GEMMs (unique A and C, shared B). */ - unsigned int multis{ 0 }; /**< Number of "multi" GEMMs (unique A, B and C). */ - }; - - static Params extract_parameters(const ITensor *a, const ITensor *b, const ITensor *c, const GEMMInfo &gemm_info); - - /** Constructor */ - INEGEMMWrapperKernel(); - /** Prevent instances of this class from being copied */ - INEGEMMWrapperKernel(const INEGEMMWrapperKernel &) = delete; - /** Prevent instances of this class from being copied */ - INEGEMMWrapperKernel &operator=(const INEGEMMWrapperKernel &) = delete; - /** Allow instances of this class to be moved */ - INEGEMMWrapperKernel(INEGEMMWrapperKernel &&) = default; - /** Allow instances of this class to be moved */ - INEGEMMWrapperKernel &operator=(INEGEMMWrapperKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @note The input and output tensor must have the same dimensions - * - * @param[in] a Input tensor (Matrix A) - * @param[in] b Input tensor (Matrix B) - * @param[out] c Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] alpha Scalar multiplier to apply to AB matrix product. - * @param[in] beta Scalar multiplier to apply to input C matrix before adding product. - * @param[in] gemm_info GEMM meta-data - */ - void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, const GEMMInfo &gemm_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -protected: - /** Called as part of configure() after _a, _b, _c and _params have been set. - * - * @param[in] alpha Scalar multiplier to apply to AB matrix product. - * @param[in] beta Scalar multiplier to apply to input C matrix before adding product. - * - * @return A 3D execution window. - */ - virtual Window configure_internal(float alpha, float beta) = 0; - - /** Run the kernel from the start to the end offset in window. - * - * @param[in] window Window to use for the iteration - * @param[in] start_offset Where to start iterating from (In Window coordinates) - * @param[in] end_offset Where to stop iterating (In Window coordinates). - * @param[in] info Info about executing thread and CPU. - */ - virtual void run_internal(const Window &window, const Coordinates &start_offset, const Coordinates &end_offset, const ThreadInfo &info) = 0; - - const ITensor *_a; - const ITensor *_b; - ITensor *_c; - Params _params; - GEMMInfo _gemm_info; - -private: - Window _window3d; - TensorShape _window_shape; -}; - -} // namespace arm_compute - -#endif /* ARM_COMPUTE_INEGEMMRAPPERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h b/arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h deleted file mode 100644 index 7c10f85824..0000000000 --- a/arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H -#define ARM_COMPUTE_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" - -#include "arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** This class is a wrapper for the depthwise convolution assembly kernels. */ -class NEDepthwiseConvolutionAssemblyKernelWrapper final : public INEKernel -{ -public: - const char *name() const override - { - return "NEDepthwiseConvolutionAssemblyKernelWrapper"; - } - - /** Default constructor */ - NEDepthwiseConvolutionAssemblyKernelWrapper() - : _kernel(nullptr) - { - } - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionAssemblyKernelWrapper(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete; - /** Default Move Constructor. */ - NEDepthwiseConvolutionAssemblyKernelWrapper(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default; - /** Default move assignment operator */ - NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default; - - /** Initialise the kernel's input and output. - * - * @param[in] kernel Pointer to an assembly kernel implementation. - */ - void configure(depthwise::IDepthwiseConvolution *kernel) - { - ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast(kernel))); - _kernel = kernel; - Window win; - win.set(Window::DimX, Window::Dimension(0, _kernel->get_window(), 1)); - INEKernel::configure(win); - } - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override - { - ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast(_kernel))); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - auto first = window.x().start(); - auto last = window.x().end(); - _kernel->run(first, last, info.thread_id); - } - -private: - depthwise::IDepthwiseConvolution *_kernel; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/convolution/common/activation.hpp b/arm_compute/core/NEON/kernels/convolution/common/activation.hpp deleted file mode 100644 index 0c9b7c1368..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/activation.hpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -namespace neon_convolution_kernels -{ - -enum class ActivationFunction -{ - None, - ReLU, - ReLU6, -}; - -} diff --git a/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp b/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp deleted file mode 100644 index 7be3cdaaf5..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#ifdef ALLOC_ALIGN -#define ALLOCATE(x) aligned_alloc(ALLOC_ALIGN, x) -#else -#define ALLOCATE(x) malloc(x) -#endif diff --git a/arm_compute/core/NEON/kernels/convolution/common/arm.hpp b/arm_compute/core/NEON/kernels/convolution/common/arm.hpp deleted file mode 100644 index b19bf98252..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/arm.hpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** Sets the macro __arm_any__ if compiling for Aarch32 or Aarch64. - * Includes `arm_neon.h` if compiling for either architecture. - */ - -#ifdef __arm__ -#define __arm_any__ -#endif // __arm__ - -#ifdef __aarch64__ -#define __arm_any__ -#endif // __aarch64__ - -#ifdef __arm_any__ -#include -#endif // __arm_any__ diff --git a/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp b/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp deleted file mode 100644 index b1413527c3..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -enum PaddingType { - PADDING_SAME, PADDING_VALID -}; diff --git a/arm_compute/core/NEON/kernels/convolution/common/padding.hpp b/arm_compute/core/NEON/kernels/convolution/common/padding.hpp deleted file mode 100644 index b6f95872c0..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/padding.hpp +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include - -// Utilities for copying tensor tiles and adding/removing padding. -namespace padding -{ - -/* Copy a tile and apply padding to the output copy. - */ -template -void copy_and_pad_tile( - unsigned int tile_rows, - unsigned int tile_cols, - unsigned int n_channels, - const T *inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - T* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride, - unsigned int pad_top, - unsigned int pad_left, - unsigned int pad_bottom, - unsigned int pad_right, - T pad_value=static_cast(0) -); - -/** Copy a tile and remove padding elements in the output. - */ -template -class CopyCropped -{ - public: - static void execute( - size_t size, // Amount of data to copy - const void *inptr, - size_t in_row_stride, - size_t in_col_stride, - void *outptr, - size_t out_row_stride, - size_t out_col_stride, - unsigned int pad_top, - unsigned int pad_left, - unsigned int pad_bottom, - unsigned int pad_right - ); -}; - -template -void crop_and_copy_tile( - unsigned int tile_rows, - unsigned int tile_cols, - unsigned int n_channels, - const T *inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - T *outptr, - unsigned int out_row_stride, - unsigned int out_col_stride, - unsigned int crop_top, - unsigned int crop_left, - unsigned int crop_bottom, - unsigned int crop_right -); - -} diff --git a/arm_compute/core/NEON/kernels/convolution/common/perf.h b/arm_compute/core/NEON/kernels/convolution/common/perf.h deleted file mode 100644 index fbae4dcdfa..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/perf.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2018 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#pragma once - -/* Prototypes from perf.c */ - -void start_counter(int fd); -long long get_counter(int fd); -long long stop_counter(int fd); -int open_instruction_counter(void); -int open_cycle_counter(void); diff --git a/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp b/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp deleted file mode 100644 index 88ef7327c0..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include - -namespace qasymm8 -{ - -struct QAsymm8Params -{ - uint8_t quantize(float value) const; - float dequantize(uint8_t value) const; - - uint8_t offset; - float scale; -}; - -struct QAsymm8RescaleParams -{ - static QAsymm8RescaleParams make_rescale_params( - const QAsymm8Params& weight_quant, - const QAsymm8Params& input_quant, - const QAsymm8Params& output_quant - ); - - QAsymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale); - - const int32_t shift, multiplier; - const float rescale; -}; - -} diff --git a/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp b/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp deleted file mode 100644 index 726a02ccfd..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include -#include -#include "qasymm8.hpp" - - -namespace qsymm8 { - -struct QSymm8Params { - int8_t quantize(float value) const; - float dequantize(int8_t value) const; - - float scale; -}; - -struct QSymm8RescaleParams { - static QSymm8RescaleParams - make_rescale_params(const QSymm8Params &weight_quant, - const QSymm8Params &input_quant, - const QSymm8Params &output_quant); - - QSymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale); - - const int32_t shift, multiplier; - const float rescale; -}; - -struct QSymm8PerChannelParams { - int8_t quantize(float value, float scale) const; - float dequantize(int8_t value, float scale) const; - - std::vector scales; -}; - -struct QSymm8PerChannelRescaleParams { - static QSymm8PerChannelRescaleParams - make_rescale_params(const QSymm8PerChannelParams &weight_quant, - const QSymm8PerChannelParams &input_quant, - const QSymm8PerChannelParams &output_quant); - - static QSymm8PerChannelRescaleParams - make_rescale_params(const QSymm8PerChannelParams &weight_quant, - const qasymm8::QAsymm8Params &input_quant, - const qasymm8::QAsymm8Params &output_quant); - - QSymm8PerChannelRescaleParams(std::vector& shift, std::vector& multiplier, std::vector& rescale); - - std::vector shifts, multipliers; - std::vector rescales; -}; - -} // namespace qsymm8 diff --git a/arm_compute/core/NEON/kernels/convolution/common/shims.hpp b/arm_compute/core/NEON/kernels/convolution/common/shims.hpp deleted file mode 100644 index 310bd47b82..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/shims.hpp +++ /dev/null @@ -1,749 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#ifndef DOXYGEN_SKIP_THIS -#include -#endif /* DOXYGEN_SKIP_THIS */ -#include "arm.hpp" - -namespace reorder { -/** Re-order a tensor from NCHW format to NHWC. - * - * @note The stride parameters are optional and are provided to allow padding in either input or output tensors. - * - * @param[in] in Input tensor in NCHW format. - * @param[out] out Output tensor, to be written in NHWC format. - * @param n_batches Number of batches in the tensors. - * @param n_channels Number of channels in the tensors - * @param n_rows Height of the tensor - * @param n_cols Width of the tensor - * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_channels * in_channel_stride`. - * @param in_channel_stride Stride over channels in the input tensor. If `0` defaults to `n_rows * in_row_stride`. - * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols`. - * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_rows * out_row_stride`. - * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols * out_col_stride`. - * @param out_col_stride Stride over columns in the output tensor. If `0` defaults to `n_channels`. - */ -template -inline void nchw_to_nhwc( - const T* const in, - T* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride=0, - int in_channel_stride=0, - int in_row_stride=0, - int out_batch_stride=0, - int out_row_stride=0, - int out_col_stride=0 -); - -/** Re-order a tensor from NHWC format to NCHW. - * - * @note The stride parameters are optional and are provided to allow padding in either input or output tensors. - * - * @param[in] in Input tensor in NHWC format. - * @param[out] out Output tensor, to be written in NCHW format. - * @param n_batches Number of batches in the tensors. - * @param n_rows Height of the tensor - * @param n_cols Width of the tensor - * @param n_channels Number of channels in the tensors - * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_rows * in_row_stride`. - * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols * in_col_stride`. - * @param in_col_stride Stride over columns in the input tensor. If `0` defaults to `n_channels`. - * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_channels * out_channel_stride`. - * @param out_channel_stride Stride over channels in the output tensor. If `0` defaults to `n_rows * out_row_stride`. - * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols`. - */ -template -inline void nhwc_to_nchw( - const T* const in, // Input data in NHWC form - T* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride=0, - int in_row_stride=0, - int in_col_stride=0, - int out_batch_stride=0, - int out_channel_stride=0, - int out_row_stride=0 -); - -/** Re-order a weight tensor from [Output feature map x Input feature map x - * Height x Width] format to [Height x Width x Input feature map x Output - * feature map] format. - */ -template -inline void ofm_ifm_h_w_to_h_w_ifm_ofm( - const T* const in, // Input in [Output x Input x Height x Width] form - T* const out, // Output in [Height x Width x Input x Output] form - const int n_output_feature_maps, - const int n_input_feature_maps, - const int n_rows, - const int n_cols, - int in_output_feature_map_stride=0, - int in_input_feature_map_stride=0, - int in_row_stride=0, - int out_row_stride=0, - int out_col_stride=0, - int out_input_feature_map_stride=0 -); - -/** Re-order a weight tensor from [Height x Width x Input feature map x Output - * feature map] format to [Output feature map x Input feature map x Height x - * Width] format. - */ -template -inline void h_w_ifm_ofm_to_ofm_ifm_h_w( - const T* const in, // Input in [Height x Width x Input x Output] form - T* const out, // Output in [Output x Input x Height x Width] form - const int n_rows, - const int n_cols, - const int n_input_feature_maps, - const int n_output_feature_maps, - int in_row_stride=0, - int in_col_stride=0, - int in_input_feature_map_stride=0, - int out_output_feature_map_stride=0, - int out_input_feature_map_stride=0, - int out_row_stride=0 -); - -/*****************************************************************************/ -/* 32-bit implementation : NCHW -> NHWC - */ -template <> -inline void nchw_to_nhwc( - const int32_t* const in, - int32_t* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride, - int in_channel_stride, - int in_row_stride, - int out_batch_stride, - int out_row_stride, - int out_col_stride -) -{ - typedef int32_t T; - - // Fill in the stride values - in_row_stride = (in_row_stride) ? in_row_stride : n_cols; - in_channel_stride = (in_channel_stride) ? in_channel_stride - : n_rows * in_row_stride; - in_batch_stride = (in_batch_stride) ? in_batch_stride - : n_channels * in_channel_stride; - - out_col_stride = (out_col_stride) ? out_col_stride : n_channels; - out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride; - out_batch_stride = (out_batch_stride) ? out_batch_stride - : n_rows * out_row_stride; - - // Perform the re-ordering - for (int n = 0; n < n_batches; n++) - { - const T* const in_batch = in + n*in_batch_stride; - T* const out_batch = out + n*out_batch_stride; - - for (int i = 0; i < n_rows; i++) - { - const T* const in_row = in_batch + i*in_row_stride; - T* const out_row = out_batch + i*out_row_stride; - - int j = 0, j_remaining = n_cols; -#ifdef __arm_any__ - for (; j_remaining >= 4; j += 4, j_remaining -= 4) - { - int c = 0, c_remaining = n_channels; - for (; c_remaining >= 4; c += 4, c_remaining -= 4) - { - // Read 4 channels worth of 4 columns, then zip to produce 4 columns - // worth of 4 channels. - int32x4_t channel_pixels[4]; - channel_pixels[0] = vld1q_s32(in_row + (c + 0)*in_channel_stride + j); - channel_pixels[1] = vld1q_s32(in_row + (c + 1)*in_channel_stride + j); - channel_pixels[2] = vld1q_s32(in_row + (c + 2)*in_channel_stride + j); - channel_pixels[3] = vld1q_s32(in_row + (c + 3)*in_channel_stride + j); - - const auto zip1 = vzipq_s32(channel_pixels[0], channel_pixels[2]); - const auto zip2 = vzipq_s32(channel_pixels[1], channel_pixels[3]); - const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]); - const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]); - - vst1q_s32(out_row + (j + 0)*out_col_stride + c, out_0.val[0]); - vst1q_s32(out_row + (j + 1)*out_col_stride + c, out_0.val[1]); - vst1q_s32(out_row + (j + 2)*out_col_stride + c, out_1.val[0]); - vst1q_s32(out_row + (j + 3)*out_col_stride + c, out_1.val[1]); - } - for (; c_remaining; c++, c_remaining--) - { - for (int _j = 0; _j < 4; _j++) - { - const T* const in_col = in_row + j + _j; - T* const out_col = out_row + (j + _j)*out_col_stride; - const T* const in_channel = in_col + c*in_channel_stride; - out_col[c] = *(in_channel); - } - } - } - for (; j_remaining >= 2; j += 2, j_remaining -= 2) - { - int c = 0, c_remaining = n_channels; - for (; c_remaining >= 2; c += 2, c_remaining -= 2) - { - // Read 2 channels worth of 2 columns, then zip to produce 2 columns - // worth of 2 channels. - int32x2_t channel_pixels[2]; - channel_pixels[0] = vld1_s32(in_row + (c + 0)*in_channel_stride + j); - channel_pixels[1] = vld1_s32(in_row + (c + 1)*in_channel_stride + j); - - const auto output = vzip_s32(channel_pixels[0], channel_pixels[1]); - - vst1_s32(out_row + (j + 0)*out_col_stride + c, output.val[0]); - vst1_s32(out_row + (j + 1)*out_col_stride + c, output.val[1]); - } - for (; c_remaining; c++, c_remaining--) - { - for (int _j = 0; _j < 2; _j++) - { - const T* const in_col = in_row + j + _j; - T* const out_col = out_row + (j + _j)*out_col_stride; - const T* const in_channel = in_col + c*in_channel_stride; - out_col[c] = *(in_channel); - } - } - } -#endif // __arm_any__ - for (; j_remaining; j++, j_remaining--) - { - const T* const in_col = in_row + j; - T* const out_col = out_row + j*out_col_stride; - - for (int c = 0; c < n_channels; c++) - { - const T* const in_channel = in_col + c*in_channel_stride; - out_col[c] = *(in_channel); - } - } - } - } -} - -template <> -inline void nchw_to_nhwc( - const uint32_t* const in, - uint32_t* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride, - int in_channel_stride, - int in_row_stride, - int out_batch_stride, - int out_row_stride, - int out_col_stride -) -{ - nchw_to_nhwc( - reinterpret_cast(in), - reinterpret_cast(out), - n_batches, n_channels, n_rows, n_cols, - in_batch_stride, in_channel_stride, in_row_stride, - out_batch_stride, out_row_stride, out_col_stride - ); -} - -template <> -inline void nchw_to_nhwc( - const float* const in, - float* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride, - int in_channel_stride, - int in_row_stride, - int out_batch_stride, - int out_row_stride, - int out_col_stride -) -{ - nchw_to_nhwc( - reinterpret_cast(in), - reinterpret_cast(out), - n_batches, n_channels, n_rows, n_cols, - in_batch_stride, in_channel_stride, in_row_stride, - out_batch_stride, out_row_stride, out_col_stride - ); -} - -/*****************************************************************************/ -/* Generic implementation : NCHW -> NHWC - */ -template -inline void nchw_to_nhwc( - const T* const in, - T* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride, - int in_channel_stride, - int in_row_stride, - int out_batch_stride, - int out_row_stride, - int out_col_stride -) -{ - // Fill in the stride values - in_row_stride = (in_row_stride) ? in_row_stride : n_cols; - in_channel_stride = (in_channel_stride) ? in_channel_stride - : n_rows * in_row_stride; - in_batch_stride = (in_batch_stride) ? in_batch_stride - : n_channels * in_channel_stride; - - out_col_stride = (out_col_stride) ? out_col_stride : n_channels; - out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride; - out_batch_stride = (out_batch_stride) ? out_batch_stride - : n_rows * out_row_stride; - - // Perform the re-ordering - for (int n = 0; n < n_batches; n++) - { - const T* const in_batch = in + n*in_batch_stride; - T* const out_batch = out + n*out_batch_stride; - - for (int i = 0; i < n_rows; i++) - { - const T* const in_row = in_batch + i*in_row_stride; - T* const out_row = out_batch + i*out_row_stride; - - for (int j = 0; j < n_cols; j++) - { - const T* const in_col = in_row + j; - T* const out_col = out_row + j*out_col_stride; - - for (int c = 0; c < n_channels; c++) - { - const T* const in_channel = in_col + c*in_channel_stride; - out_col[c] = *(in_channel); - } - } - } - } -} - -/*****************************************************************************/ -/* 32-bit implementation : NHWC -> NCHW - */ -template <> -inline void nhwc_to_nchw( - const int32_t* const in, // Input data in NHWC form - int32_t* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride, - int in_row_stride, - int in_col_stride, - int out_batch_stride, - int out_channel_stride, - int out_row_stride -) -{ - typedef int32_t T; - - // Fill in stride values - in_col_stride = (in_col_stride) ? in_col_stride : n_channels; - in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride; - in_batch_stride = (in_batch_stride) ? in_batch_stride - : n_rows * in_row_stride; - - out_row_stride = (out_row_stride) ? out_row_stride : n_cols; - out_channel_stride = (out_channel_stride) ? out_channel_stride - : n_rows * out_row_stride; - out_batch_stride = (out_batch_stride) ? out_batch_stride - : n_channels * out_channel_stride; - - // Perform the re-ordering - // For every batch - for (int n = 0; n < n_batches; n++) - { - const T* const in_batch = in + n*in_batch_stride; - T* const out_batch = out + n*out_batch_stride; - - // For every row - for (int i = 0; i < n_rows; i++) - { - const T* const in_i = in_batch + i*in_row_stride; - T* const out_i = out_batch + i*out_row_stride; - - // For every column, beginning with chunks of 4 - int j = 0, j_remaining = n_cols; -#ifdef __arm_any__ - for (; j_remaining >= 4; j += 4, j_remaining -=4) - { - // For every channel, beginning with chunks of 4 - int c = 0, c_remaining = n_channels; - for (; c_remaining >= 4; c += 4, c_remaining -= 4) - { - // Read 4 columns worth of 4 channels then zip to produce 4 channels - // worth of 4 columns. - int32x4_t pixel_channels[4]; - pixel_channels[0] = vld1q_s32(in_i + (j + 0)*in_col_stride + c); - pixel_channels[1] = vld1q_s32(in_i + (j + 1)*in_col_stride + c); - pixel_channels[2] = vld1q_s32(in_i + (j + 2)*in_col_stride + c); - pixel_channels[3] = vld1q_s32(in_i + (j + 3)*in_col_stride + c); - - const auto zip1 = vzipq_s32(pixel_channels[0], pixel_channels[2]); - const auto zip2 = vzipq_s32(pixel_channels[1], pixel_channels[3]); - const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]); - const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]); - - vst1q_s32(out_i + j + (c + 0)*out_channel_stride, out_0.val[0]); - vst1q_s32(out_i + j + (c + 1)*out_channel_stride, out_0.val[1]); - vst1q_s32(out_i + j + (c + 2)*out_channel_stride, out_1.val[0]); - vst1q_s32(out_i + j + (c + 3)*out_channel_stride, out_1.val[1]); - } - for (; c_remaining; c++, c_remaining--) - { - for (int _j = 0; _j < 4; _j++) - { - const T* const in_j = in_i + (j + _j)*in_col_stride; - T* const out_j = out_i + (j + _j); - - const T* const in_channel = in_j + c; - T* const out_channel = out_j + c*out_channel_stride; - *(out_channel) = *(in_channel); - } - } - } - for (; j_remaining >= 2; j += 2, j_remaining -=2) - { - int c = 0, c_remaining = n_channels; - for (; c_remaining >= 2; c += 2, c_remaining -= 2) - { - // Read 2 columns worth of 2 channels then zip to produce 2 channels - // worth of 2 columns. - int32x2_t pixel_channels[2]; - pixel_channels[0] = vld1_s32(in_i + (j + 0)*in_col_stride + c); - pixel_channels[1] = vld1_s32(in_i + (j + 1)*in_col_stride + c); - - const auto output = vzip_s32(pixel_channels[0], pixel_channels[1]); - - vst1_s32(out_i + j + (c + 0)*out_channel_stride, output.val[0]); - vst1_s32(out_i + j + (c + 1)*out_channel_stride, output.val[1]); - } - for (; c_remaining; c++, c_remaining--) - { - for (int _j = 0; _j < 2; _j++) - { - const T* const in_j = in_i + (j + _j)*in_col_stride; - T* const out_j = out_i + (j + _j); - - const T* const in_channel = in_j + c; - T* const out_channel = out_j + c*out_channel_stride; - *(out_channel) = *(in_channel); - } - } - } -#endif // __arm_any__ - for (; j_remaining; j++, j_remaining--) - { - const T* const in_j = in_i + j*in_col_stride; - T* const out_j = out_i + j; - - // For every channel - for (int c = 0; c < n_channels; c++) - { - const T* const in_channel = in_j + c; - T* const out_channel = out_j + c*out_channel_stride; - *(out_channel) = *(in_channel); - } - } - } - } -} - -template <> -inline void nhwc_to_nchw( - const uint32_t* const in, // Input data in NHWC form - uint32_t* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride, - int in_row_stride, - int in_col_stride, - int out_batch_stride, - int out_channel_stride, - int out_row_stride -) -{ - // Redirect to generic 32-bit implementation - nhwc_to_nchw( - reinterpret_cast(in), - reinterpret_cast(out), - n_batches, n_rows, n_cols, n_channels, - in_batch_stride, in_row_stride, in_col_stride, - out_batch_stride, out_channel_stride, out_row_stride - ); -} - -template <> -inline void nhwc_to_nchw( - const float* const in, // Input data in NHWC form - float* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride, - int in_row_stride, - int in_col_stride, - int out_batch_stride, - int out_channel_stride, - int out_row_stride -) -{ - // Redirect to generic 32-bit implementation - nhwc_to_nchw( - reinterpret_cast(in), - reinterpret_cast(out), - n_batches, n_rows, n_cols, n_channels, - in_batch_stride, in_row_stride, in_col_stride, - out_batch_stride, out_channel_stride, out_row_stride - ); -} - -/*****************************************************************************/ -/* Generic implementation : NHWC -> NCHW - */ -template -inline void nhwc_to_nchw( - const T* const in, // Input data in NHWC form - T* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride, - int in_row_stride, - int in_col_stride, - int out_batch_stride, - int out_channel_stride, - int out_row_stride -) -{ - // Fill in stride values - in_col_stride = (in_col_stride) ? in_col_stride : n_channels; - in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride; - in_batch_stride = (in_batch_stride) ? in_batch_stride - : n_rows * in_row_stride; - - out_row_stride = (out_row_stride) ? out_row_stride : n_cols; - out_channel_stride = (out_channel_stride) ? out_channel_stride - : n_rows * out_row_stride; - out_batch_stride = (out_batch_stride) ? out_batch_stride - : n_channels * out_channel_stride; - - // Perform the re-ordering - // For every batch - for (int n = 0; n < n_batches; n++) - { - const T* const in_batch = in + n*in_batch_stride; - T* const out_batch = out + n*out_batch_stride; - - // For every row - for (int i = 0; i < n_rows; i++) - { - const T* const in_i = in_batch + i*in_row_stride; - T* const out_i = out_batch + i*out_row_stride; - - // For every column - for (int j = 0; j < n_cols; j++) - { - const T* const in_j = in_i + j*in_col_stride; - T* const out_j = out_i + j; - - // For every channel - for (int c = 0; c < n_channels; c++) - { - const T* const in_channel = in_j + c; - T* const out_channel = out_j + c*out_channel_stride; - *(out_channel) = *(in_channel); - } - } - } - } -} - -/*****************************************************************************/ -/* Generic weight re-order implementation. - */ -template -inline void ofm_ifm_h_w_to_h_w_ifm_ofm( - const T* const in, // Input in [Output x Input x Height x Width] form - T* const out, // Output in [Height x Width x Input x Output] form - const int n_output_feature_maps, - const int n_input_feature_maps, - const int n_rows, - const int n_cols, - int in_output_feature_map_stride, - int in_input_feature_map_stride, - int in_row_stride, - int out_row_stride, - int out_col_stride, - int out_input_feature_map_stride -) -{ - // Fill in stride values - in_row_stride = (in_row_stride) - ? in_row_stride - : n_cols; - in_input_feature_map_stride = (in_input_feature_map_stride) - ? in_input_feature_map_stride - : n_rows * in_row_stride; - in_output_feature_map_stride = (in_output_feature_map_stride) - ? in_output_feature_map_stride - : n_input_feature_maps * in_input_feature_map_stride; - - out_input_feature_map_stride = (out_input_feature_map_stride) - ? out_input_feature_map_stride - : n_output_feature_maps; - out_col_stride = (out_col_stride) - ? out_col_stride - : n_input_feature_maps * out_input_feature_map_stride; - out_row_stride = (out_row_stride) - ? out_row_stride - : n_cols * out_col_stride; - - // Perform the re-ordering - for (int i = 0; i < n_rows; i++) - { - const T* const in_row = in + i * in_row_stride; - T* out_row = out + i * out_row_stride; - - for (int j = 0; j < n_cols; j++) - { - const T* const in_col = in_row + j; - T* const out_col = out_row + j * out_col_stride; - - for (int ifm = 0; ifm < n_input_feature_maps; ifm++) - { - const T* const in_ifm = in_col + ifm * in_input_feature_map_stride; - T* const out_ifm = out_col + ifm * out_input_feature_map_stride; - - for (int ofm = 0; ofm < n_output_feature_maps; ofm++) - { - const T* const in_ofm = in_ifm + ofm * in_output_feature_map_stride; - T* const out_ofm = out_ifm + ofm; - *(out_ofm) = *(in_ofm); - } - } - } - } -} - -/*****************************************************************************/ -/* Generic weight re-order implementation. - */ -template -inline void h_w_ifm_ofm_to_ofm_ifm_h_w( - const T* const in, // Input in [Height x Width x Input x Output] form - T* const out, // Output in [Output x Input x Height x Width] form - const int n_rows, - const int n_cols, - const int n_input_feature_maps, - const int n_output_feature_maps, - int in_row_stride, - int in_col_stride, - int in_input_feature_map_stride, - int out_output_feature_map_stride, - int out_input_feature_map_stride, - int out_row_stride -) -{ - // Fill in the stride values - in_input_feature_map_stride = (in_input_feature_map_stride) - ? in_input_feature_map_stride - : n_output_feature_maps; - in_col_stride = (in_col_stride) - ? in_col_stride - : n_input_feature_maps * in_input_feature_map_stride; - in_row_stride = (in_row_stride) - ? in_row_stride - : n_cols * in_col_stride; - - out_row_stride = (out_row_stride) - ? out_row_stride - : n_cols; - out_input_feature_map_stride = (out_input_feature_map_stride) - ? out_input_feature_map_stride - : n_rows * out_row_stride; - out_output_feature_map_stride = (out_output_feature_map_stride) - ? out_output_feature_map_stride - : n_input_feature_maps * out_input_feature_map_stride; - - // Perform the re-ordering - for (int i = 0; i < n_rows; i++) - { - const T* const in_row = in + i * in_row_stride; - T* const out_row = out + i * out_row_stride; - - for (int j = 0; j < n_cols; j++) - { - const T* const in_col = in_row + j * in_col_stride; - T* const out_col = out_row + j; - - for (int ifm = 0; ifm < n_input_feature_maps; ifm++) - { - const T* const in_ifm = in_col + ifm * in_input_feature_map_stride; - T* const out_ifm = out_col + ifm * out_input_feature_map_stride; - - for (int ofm = 0; ofm < n_output_feature_maps; ofm++) - { - const T* const in_ofm = in_ifm + ofm; - T* const out_ofm = out_ifm + ofm * out_output_feature_map_stride; - *(out_ofm) = *(in_ofm); - } - } - } - } -} - -} // namespace reorder diff --git a/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp b/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp deleted file mode 100644 index 7738cdb349..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include -#include - -#include "alloc.hpp" - -enum TensorOrder -{ - NHWC, ///< [Batch x Height x Width x Channels] - NCHW, ///< [Batch x Channels x Height x Width] -}; - -struct Tensor4DShape -{ - int n_batches, n_rows, n_cols, n_channels; - TensorOrder ordering; - - // Create a new tensor with the default (NHWC) ordering - inline Tensor4DShape( - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - const TensorOrder ordering=NHWC - ) : n_batches(n_batches), - n_rows(n_rows), - n_cols(n_cols), - n_channels(n_channels), - ordering(ordering) - { - } - - inline int index(const int n, const int i, const int j, const int c) const - { - if (this->ordering == NHWC) - { - return ((n*this->n_rows + i)*this->n_cols + j)*this->n_channels + c; - } - else // NCHW - { - return ((n*this->n_channels + c)*this->n_rows + i)*this->n_cols + j; - } - } - - inline int size() const - { - return n_batches * n_rows * n_cols * n_channels; - } - - inline bool TestEq(const Tensor4DShape& other) const - { - return (n_batches == other.n_batches && - n_rows == other.n_rows && - n_cols == other.n_cols && - n_channels == other.n_channels); - } -}; - - -enum WeightOrder -{ - HWIO, ///< [Height x Width x Input channels x Output channels] - OIHW, ///< [Output channels x Input channels x Height x Width] -}; - -struct KernelShape -{ - int n_output_channels, n_rows, n_cols, n_input_channels; - WeightOrder ordering; - - inline KernelShape( - const int n_output_channels, - const int n_rows, - const int n_cols, - const int n_input_channels, - const WeightOrder ordering=HWIO - ) : n_output_channels(n_output_channels), - n_rows(n_rows), - n_cols(n_cols), - n_input_channels(n_input_channels), - ordering(ordering) - { - } - - inline int index(int oc, int i, int j, int ic) const - { - if (this->ordering == HWIO) - { - return ((i*this->n_cols + j)*this->n_input_channels + ic)*this->n_output_channels + oc; - } - else // OIHW - { - return ((oc*this->n_input_channels + ic)*this->n_rows + i)*this->n_cols + j; - } - } - - inline int size(void) const - { - return n_output_channels * n_rows * n_cols * n_input_channels; - } -}; - - -template -class Tensor4D final -{ - public: - Tensor4D(ShapeT shape) : - shape(shape), - _data(reinterpret_cast(ALLOCATE(size_bytes()))) - { - Clear(); - } - - Tensor4D(const Tensor4D&) = delete; - Tensor4D operator=(const Tensor4D&) = delete; - - ~Tensor4D() { - free(_data); - } - - inline T* ptr() const { - return _data; - } - - inline size_t size_bytes() const { - return shape.size() * sizeof(T); - } - - /* Extract an element of the tensor. - * - * If the shape is a Tensor4DShape then the index is given as batch, row, - * column and channel. If the shape is a KernelShape then the index is - * given as output channel, row, column and input channel. - */ - inline T& element(const int a, const int b, const int c, const int d) const - { - return _data[shape.index(a, b, c, d)]; - } - - inline void Clear() { - Fill(static_cast(0)); - } - - inline void Fill(T val) { - for (int i = 0; i < shape.size(); i++) - _data[i] = val; - } - - const ShapeT shape; - - private: - T* const _data; -}; diff --git a/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp deleted file mode 100644 index 82619f4799..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include "tensor.hpp" - -// Methods to print tensors and weights -void PrintTensor(const Tensor4D& tensor); -void PrintWeights(const Tensor4D& weights); - -// Test the equivalence of two tensors -// Counts the instances that |a - b|/|a| > max_err -bool CmpTensors( - const Tensor4D& a, - const Tensor4D& b, - const float max_err=0.0f -); - -// Fill the tensor with a test pattern -void TestPattern(Tensor4D& tensor); -void TestPattern(Tensor4D& weights); - -// Fill the tensor with random values -void Randomise(Tensor4D& tensor, const int seed=0); -void Randomise(Tensor4D& weights, const int seed=0); diff --git a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp deleted file mode 100644 index b7a9517c65..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2017-2018 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include - -void PrintMatrix(const float *const m, const int M, const int N, const int row_stride); - -constexpr inline int iceildiv(const int a, const int b) -{ - return (a + b - 1) / b; -} - -template -inline T roundup(const T a, const T b) -{ - return b * iceildiv(a, b); -} - -template -struct TypeBounds -{ - static constexpr T lower() noexcept { return std::numeric_limits::has_infinity - ? -std::numeric_limits::infinity() - : std::numeric_limits::lowest(); }; - static constexpr T upper() noexcept { return std::numeric_limits::has_infinity - ? std::numeric_limits::infinity() - : std::numeric_limits::max(); }; -}; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -template<> -struct TypeBounds<__fp16> -{ - static constexpr __fp16 lower() noexcept { return -std::numeric_limits::infinity(); }; - static constexpr __fp16 upper() noexcept { return std::numeric_limits::infinity(); } -}; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp deleted file mode 100644 index 70d6689731..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp +++ /dev/null @@ -1,551 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include -#include "activation.hpp" -#include "padding.hpp" - -namespace depthwise -{ - -namespace nck = neon_convolution_kernels; - -class IDepthwiseConvolution -{ - public: - virtual ~IDepthwiseConvolution() = default; - - virtual int output_size( - int dim_size, - unsigned int padding_before, - unsigned int padding_after - ) const = 0; - - /* Set input tensor and stride. */ - virtual void set_input(const void *inptr) = 0; - virtual void set_input(const void *inptr, int column_stride) = 0; - virtual void set_input(const void *inptr, int row_stride, int column_stride) = 0; - virtual void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) = 0; - - /* Set output tensor and stride. */ - virtual void set_output(void *outptr) = 0; - virtual void set_output(void *outptr, int column_stride) = 0; - virtual void set_output(void *outptr, int row_stride, int column_stride) = 0; - virtual void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) = 0; - - /* Weights and biases are re-ordered to improve memory access patterns. Use - * these methods to determine the size of the re-pack buffer and to set the - * address (and implicitly reorder the weights and biases into) the buffer. - */ - virtual size_t get_packed_params_size(void) const = 0; - virtual void set_packed_params_buffer(void *) = 0; - - virtual void pack_params(const void *weights, const void *biases=nullptr) const = 0; - virtual void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const = 0; - virtual void pack_params( - void *buffer, - const void* weights, - unsigned int weight_row_stride, - unsigned int weight_col_stride, - const void *biases=nullptr - ) const = 0; - - /* Working space is used to pad tensors on the fly. Before running any - * inference check the amount of space required, allocate and provide a - * pointer to the convolution engine. - */ - virtual size_t get_working_space_size(unsigned int nthreads=1) const = 0; - virtual void set_working_space(void *) = 0; - - virtual unsigned int get_window(void) const = 0; - virtual void run( - unsigned int start, - unsigned int stop, - unsigned int threadid=0 - ) = 0; -}; - -template < - unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols, - typename TIn, typename TBias, typename TOut, - typename Derived -> -class DepthwiseConvolutionBase : public IDepthwiseConvolution -{ - public: - // Information about the specific convolution instance - using InputType = TIn; - using BiasType = TBias; - using OutputType = TOut; - static constexpr int output_tile_rows = OutputTileRows; - static constexpr int output_tile_cols = OutputTileCols; - static constexpr int kernel_rows = KernelRows; - static constexpr int kernel_cols = KernelCols; - static constexpr int stride_rows = StrideRows; - static constexpr int stride_cols = StrideCols; - static constexpr int inner_tile_rows = stride_rows * (output_tile_rows - 1) + kernel_rows; - static constexpr int inner_tile_cols = stride_cols * (output_tile_cols - 1) + kernel_cols; - - /** Create a new depthwise convolution engine. - * - * @param[in] n_batches Number of batches tensors. - * @param[in] n_input_rows Number of rows in input tensor. - * @param[in] n_input_cols Number of columns in input tensor. - * @param[in] n_channels Number of channels in input and output tensors. - */ - DepthwiseConvolutionBase( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - /** Create a new depthwise convolution engine. - * - * @param[in] n_batches Number of batches tensors. - * @param[in] n_input_rows Number of rows in input tensor. - * @param[in] n_input_cols Number of columns in input tensor. - * @param[in] n_channels Number of channels in input and output tensors. - */ - DepthwiseConvolutionBase( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - // Cannot copy or move a DepthwiseConvolution. - DepthwiseConvolutionBase(DepthwiseConvolutionBase&) = delete; - DepthwiseConvolutionBase operator=(DepthwiseConvolutionBase&) = delete; - - /* Set input tensor and stride. */ - void set_input(const void *inptr) override; - void set_input(const void *inptr, int column_stride) override; - void set_input(const void *inptr, int row_stride, int column_stride) override; - void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override; - - /* Set output tensor and stride. */ - void set_output(void *outptr) override; - void set_output(void *outptr, int column_stride) override; - void set_output(void *outptr, int row_stride, int column_stride) override; - void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override; - - /** Get the number of output rows/columns. - * - * @param[in] dim_size Number of elements in the dimension (rows/columns) - * @param[in] same_padding True if the padding is SAME, otherwise false. - */ - static int get_output_size( - int dim_size, unsigned int padding_before, unsigned int padding_after - ); - - int output_size( - int dim_size, unsigned int padding_before, unsigned int padding_after - ) const override; - - /* Determine how much memory is required to store the packed weights and - * biases. - */ - size_t get_packed_params_size(void) const override; - - /* Set the buffer for the packed weights and biases, and perform the - * packing. - */ - void set_packed_params_buffer(void *buffer) override; - - void pack_params(const void *weights, const void *biases=nullptr) const override; - - void pack_params( - void *buffer, - const void *weights, - const void *biases=nullptr - ) const override; - - void pack_params( - void *buffer, - const void *weights, - unsigned int weight_row_stride, - unsigned int weight_col_stride, - const void *biases=nullptr - ) const override; - - /** Query the amount of working space required. - * @param[in] The largest number of threads which will be used to execute - * the kernel. - */ - size_t get_working_space_size(unsigned int n_threads=1) const override; - - /** Set the working space buffer. - */ - void set_working_space(void *buffer) override; - - /** Get the window of work to be performed by an instance of the operator. - */ - unsigned int get_window(void) const override; - - /** Perform a portion of the work associated with the operator. - * - * Will perform the window of work described by $[start, stop)$. - * - * @param[in] start Start of the window of work to perform. - * @param[in] stop End of the work to perform. - * @param[in] ID of the thread performing the work. - */ - void run( - unsigned int start, - unsigned int stop, - unsigned int threadid=0 - ) override; - - protected: - /** Get the value to use to pad the tensor. - */ - TIn _input_padding_value(void) const; - - /** Implementation of the parameter packing. - */ - void _pack_params( - void *buffer, - const void *weights, - unsigned int weight_row_stride, - unsigned int weight_col_stride, - const void *biases=nullptr - ) const; - - /** Process a tile-row of the tensors. - */ - void process_tile_row( - unsigned int threadid, - int n_channels, - const void* packed_params, - const InputType* inptr, - OutputType* outptr, - int row_pad_in_top, - int row_pad_in_left, - int row_pad_in_bottom, - int row_pad_out_bottom, - int n_tiles, - int n_input_cols, - int n_output_cols - ); - - /** Process a single tile of the tensor. - * - * This method will apply input/output padding (if required) and call the - * depthwise tile implementation. - */ - void process_tile( - unsigned int threadid, - int n_channels, - const void* packed_params, - const InputType* inptr, - OutputType* outptr, - int pad_in_top, - int pad_in_left, - int pad_in_bottom, - int pad_in_right, - int pad_out_bottom, - int pad_out_right - ); - - /** Perform depthwise convolution on a single tile. - */ - template - void execute_tile( - int n_channels, - const void* packed_params, - const InputType* inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - OutputType* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride - ); - - template - void execute_tile( - int n_channels, - const void* packed_params, - const InputType* inptrs[inner_tile_rows][inner_tile_cols], - OutputType* outptrs[output_tile_rows][output_tile_cols] - ); - - int n_channels(void) const; - - private: - // Member variables of instances of a convolution engine. - const InputType* _input; - OutputType* _output; - void* _packed_parameters; - void* _working_space; // Per-thread working space - const int _n_batches, _n_input_rows, _n_input_cols, _n_channels, - _n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols; - const unsigned int _padding_top, _padding_left, _padding_bottom, _padding_right; - const nck::ActivationFunction _activation; - - // Stride information for a convolution instance - int _input_col_stride, _input_row_stride, _input_batch_stride; - int _output_col_stride, _output_row_stride, _output_batch_stride; - - // Methods for getting access to working space - size_t _get_input_working_space_size(void) const; - size_t _get_output_working_space_size(void) const; - - void *_get_input_working_space(unsigned int threadid) const; - void *_get_output_working_space(unsigned int threadid) const; -}; - - -template < - unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols, - typename TIn, typename TBias, typename TOut -> -class DepthwiseConvolution : public DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - TIn, TBias, TOut, - DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - TIn, TBias, TOut - > -> -{ - using Base = DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - TIn, TBias, TOut, - DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - TIn, TBias, TOut - > >; - friend Base; - using InputType = typename Base::InputType; - using OutputType = typename Base::OutputType; - - public: - using Base::DepthwiseConvolutionBase; - - protected: - template - void execute_tile( - int n_channels, - const void* packed_params, - const TIn* inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - TOut* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride - ); - - template - void execute_tile( - int n_channels, - const void* packed_params, - const InputType* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], - OutputType* outptrs[Base::output_tile_rows][Base::output_tile_cols] - ); -}; - - -template < - unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols -> -class DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float, float, float -> : public DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float, float, float, - DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float, float, float - > -> -{ - using Base = DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float, float, float, - DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float, float, float - > >; - friend Base; - using InputType = typename Base::InputType; - using OutputType = typename Base::OutputType; - - public: - DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - protected: - template - void execute_tile( - int n_channels, - const void* packed_params, - const float* inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - float* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride - ); - - template - void execute_tile( - int n_channels, - const void* packed_params, - const float* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], - float* outptrs[Base::output_tile_rows][Base::output_tile_cols] - ); -}; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -template < - unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols -> -class DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float16_t, float16_t, float16_t -> : public DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float16_t, float16_t, float16_t, - DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float16_t, float16_t, float16_t - > -> -{ - using Base = DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float16_t, float16_t, float16_t, - DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float16_t, float16_t, float16_t - > >; - friend Base; - using InputType = typename Base::InputType; - using OutputType = typename Base::OutputType; - - public: - DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - protected: - template - void execute_tile( - int n_channels, - const void* packed_params, - const float16_t* inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - float16_t* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride - ); - - template - void execute_tile( - int n_channels, - const void* packed_params, - const float16_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], - float16_t* outptrs[Base::output_tile_rows][Base::output_tile_cols] - ); -}; -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -} // namespace depthwise diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp deleted file mode 100644 index 1bae815613..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include -#include -#include - -#include "depthwise.hpp" - -namespace depthwise -{ - -template < - unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols, - typename TIn, typename TBias, typename TOut -> -class DilatedDepthwiseConvolution : public IDepthwiseConvolution -{ - public: - /** Create a new dilated depthwise convolution engine. - */ - DilatedDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int dilation_factor, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - /** Create a new dilated depthwise convolution engine. - */ - DilatedDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int dilation_factor, int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - // Cannot copy or move a DilatedDepthwiseConvolution. - DilatedDepthwiseConvolution(DilatedDepthwiseConvolution&) = delete; - DilatedDepthwiseConvolution operator=(DilatedDepthwiseConvolution&) = delete; - - /* Set input tensor and stride. */ - void set_input(const void *inptr) override; - void set_input(const void *inptr, int column_stride) override; - void set_input(const void *inptr, int row_stride, int column_stride) override; - void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override; - - /* Set output tensor and stride. */ - void set_output(void *outptr) override; - void set_output(void *outptr, int column_stride) override; - void set_output(void *outptr, int row_stride, int column_stride) override; - void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override; - - static int get_output_size( - int dim_size, - unsigned int padding_before, - unsigned int padding_after, - int dilation_factor - ); - - int output_size( - int dim_size, unsigned int padding_before, unsigned int padding_after - ) const override; - - /* Weights and biases are re-ordered to improve memory access patterns. Use - * these methods to determine the size of the re-pack buffer and to set the - * address (and implicitly reorder the weights and biases into) the buffer. - */ - size_t get_packed_params_size(void) const override; - void set_packed_params_buffer(void *) override; - - void pack_params(const void *weights, const void *biases=nullptr) const override; - void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const override; - void pack_params( - void *buffer, - const void* weights, - unsigned int weight_row_stride, - unsigned int weight_col_stride, - const void *biases=nullptr - ) const override; - - /* Working space is used to pad tensors on the fly. Before running any - * inference check the amount of space required, allocate and provide a - * pointer to the convolution engine. - */ - size_t get_working_space_size(unsigned int nthreads=1) const override; - void set_working_space(void *) override; - - unsigned int get_window(void) const override; - void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override; - - protected: - /** Protected constructor which also accepts a function to construct a new - * subconvolution - */ - DilatedDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int dilation_factor, int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right, - std::function subconvfn - ); - - const int _dilation_factor; - const int _n_input_rows, _n_input_cols, _n_channels; - const int _padding_top, _padding_left; - const int _n_output_rows, _n_output_cols; - - /* Dilated depthwise convolution is performed through repeated calls to - * non-dilated convolutions. If the dilation factor is $n$, then we perform - * $(n + 1)^2$ depthwise convolutions. - */ - using BaseDepthwise = DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - TIn, TBias, TOut - >; - std::deque>> _convs; -}; - -} // namespace depthwise diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp deleted file mode 100644 index 4343f6ad45..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include "depthwise.hpp" -#include "qasymm8.hpp" -#include "qsymm8.hpp" -#pragma once - -using namespace neon_convolution_kernels; -using namespace qasymm8; - -inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32x4_t& b) -{ - return vqrdmulhq_s32(a, b); -} - -inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32_t& b) -{ - return vqrdmulhq_n_s32(a, b); -} - -inline int32_t saturating_doubling_high_mul(const int32_t& a, const int32_t& b) -{ - return vget_lane_s32(vqrdmulh_n_s32(vdup_n_s32(a), b), 0); -} - -inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int32x4_t shift) -{ - const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31); - const int32x4_t fixed = vqaddq_s32(x, fixup); - return vrshlq_s32(fixed, shift); -} - -inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int exponent) -{ - const int32x4_t shift = vdupq_n_s32(-exponent); - const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31); - const int32x4_t fixed = vqaddq_s32(x, fixup); - return vrshlq_s32(fixed, shift); -} - -inline int32x2_t rounding_divide_by_exp2(const int32x2_t& x, const int exponent) -{ - const int32x2_t shift = vdup_n_s32(-exponent); - const int32x2_t fixup = vshr_n_s32(vand_s32(x, shift), 31); - const int32x2_t fixed = vqadd_s32(x, fixup); - return vrshl_s32(fixed, shift); -} - -inline int32_t rounding_divide_by_exp2(const int32_t& x, const int exponent) -{ - const int32x2_t xs = vdup_n_s32(x); - return vget_lane_s32(rounding_divide_by_exp2(xs, exponent), 0); -} - -namespace depthwise -{ - -namespace nck = neon_convolution_kernels; - -template < - unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols -> -class QAsymm8DepthwiseConvolution : public DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - uint8_t, int32_t, uint8_t, - QAsymm8DepthwiseConvolution -> -{ - using Base = DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - uint8_t, int32_t, uint8_t, - QAsymm8DepthwiseConvolution - >; - friend Base; - using InputType = typename Base::InputType; - using OutputType = typename Base::OutputType; - - public: - QAsymm8DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - nck::ActivationFunction activation, - const qasymm8::QAsymm8Params& weight_quantisation, - const qasymm8::QAsymm8Params& input_quantisation, - const qasymm8::QAsymm8Params& output_quantisation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - QAsymm8DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - const qasymm8::QAsymm8Params& weight_quantisation, - const qasymm8::QAsymm8Params& input_quantisation, - const qasymm8::QAsymm8Params& output_quantisation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - QAsymm8DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - nck::ActivationFunction activation, - const qasymm8::QAsymm8Params& weight_quantisation, - const qasymm8::QAsymm8Params& input_quantisation, - const qasymm8::QAsymm8Params& output_quantisation, - const qasymm8::QAsymm8RescaleParams& rescale_parameters, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - QAsymm8DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - const qasymm8::QAsymm8Params& weight_quantisation, - const qasymm8::QAsymm8Params& input_quantisation, - const qasymm8::QAsymm8Params& output_quantisation, - const qasymm8::QAsymm8RescaleParams& rescale_parameters, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - protected: - uint8_t _input_padding_value(void) const; - - void _pack_params( - void *buffer, - const void *weights, - unsigned int weight_row_stride, - unsigned int weight_col_stride, - const void *biases=nullptr - ) const; - - template - void execute_tile( - int n_channels, - const void* packed_params, - const uint8_t* inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - uint8_t* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride - ); - - template - void execute_tile( - int n_channels, - const void* packed_params, - const uint8_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], - uint8_t* outptrs[Base::output_tile_rows][Base::output_tile_cols] - ); - - private: - // Quantization parameters - const qasymm8::QAsymm8Params _weights_quant, _inputs_quant, _output_quant; - const qasymm8::QAsymm8RescaleParams rescale_parameters; -}; - -template < - unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols -> -class QSymm8HybridPerChannelDepthwiseConvolution : public DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - uint8_t, int32_t, uint8_t, - QSymm8HybridPerChannelDepthwiseConvolution -> -{ - using Base = DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - uint8_t, int32_t, uint8_t, - QSymm8HybridPerChannelDepthwiseConvolution - >; - friend Base; - using InputType = typename Base::InputType; - using OutputType = typename Base::OutputType; - - public: - QSymm8HybridPerChannelDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - nck::ActivationFunction activation, - const qsymm8::QSymm8PerChannelParams& weight_quantisation, - const qasymm8::QAsymm8Params& input_quantisation, - const qasymm8::QAsymm8Params& output_quantisation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - QSymm8HybridPerChannelDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - nck::ActivationFunction activation, - const qsymm8::QSymm8PerChannelParams& weight_quantisation, - const qasymm8::QAsymm8Params& input_quantisation, - const qasymm8::QAsymm8Params& output_quantisation, - const qsymm8::QSymm8PerChannelRescaleParams& rescale_parameters, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - size_t get_packed_params_size(void) const override - { - return this->n_channels() * (sizeof(int8_t)*KernelRows*KernelCols + 3*sizeof(int32_t)); - - } - - protected: - uint8_t _input_padding_value(void) const; - - void _pack_params( - void *buffer, - const void *weights, - unsigned int weight_row_stride, - unsigned int weight_col_stride, - const void *biases=nullptr - ) const; - - template - void execute_tile( - int n_channels, - const void* packed_params, - const uint8_t* inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - uint8_t* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride - ); - - template - void execute_tile( - int n_channels, - const void* packed_params, - const uint8_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], - uint8_t* outptrs[Base::output_tile_rows][Base::output_tile_cols] - ); - - private: - // Quantization parameters - const qsymm8::QSymm8PerChannelParams _weights_quant; - const qasymm8::QAsymm8Params _input_quant, _output_quant; - const qsymm8::QSymm8PerChannelRescaleParams _rescale_parameters; -}; - -} // namespace depthwise diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp deleted file mode 100644 index a11b0981c9..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include "depthwise_dilated.hpp" -#include "depthwise_quantized.hpp" - -namespace depthwise { - -template -class QAsymm8DilatedDepthwiseConvolution - : public DilatedDepthwiseConvolution< - OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, - StrideCols, uint8_t, int32_t, uint8_t> { -public: - /** Create a new dilated depthwise convolution engine. - */ - QAsymm8DilatedDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int dilation_factor, nck::ActivationFunction activation, - const qasymm8::QAsymm8Params &weight_quantisation, - const qasymm8::QAsymm8Params &input_quantisation, - const qasymm8::QAsymm8Params &output_quantisation, - unsigned int padding_top, unsigned int padding_left, - unsigned int padding_bottom, unsigned int padding_right); - - /** Create a new dilated depthwise convolution engine. - */ - QAsymm8DilatedDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int dilation_factor, int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - const qasymm8::QAsymm8Params &weight_quantisation, - const qasymm8::QAsymm8Params &input_quantisation, - const qasymm8::QAsymm8Params &output_quantisation, - unsigned int padding_top, unsigned int padding_left, - unsigned int padding_bottom, unsigned int padding_right); - - /** Create a new dilated depthwise convolution engine. - */ - QAsymm8DilatedDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int dilation_factor, nck::ActivationFunction activation, - const qasymm8::QAsymm8Params &weight_quantisation, - const qasymm8::QAsymm8Params &input_quantisation, - const qasymm8::QAsymm8Params &output_quantisation, - const qasymm8::QAsymm8RescaleParams &rescale_parameters, - unsigned int padding_top, unsigned int padding_left, - unsigned int padding_bottom, unsigned int padding_right); - - /** Create a new dilated depthwise convolution engine. - */ - QAsymm8DilatedDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int dilation_factor, int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - const qasymm8::QAsymm8Params &weight_quantisation, - const qasymm8::QAsymm8Params &input_quantisation, - const qasymm8::QAsymm8Params &output_quantisation, - const qasymm8::QAsymm8RescaleParams& rescale_parameters, - unsigned int padding_top, unsigned int padding_left, - unsigned int padding_bottom, unsigned int padding_right); -}; - -} // namespace depthwise diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h index f604f55924..6654ccf00a 100644 --- a/arm_compute/core/SubTensorInfo.h +++ b/arm_compute/core/SubTensorInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,7 +31,6 @@ #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Validate.h" #include #include diff --git a/arm_compute/core/utils/helpers/bit_ops.h b/arm_compute/core/utils/helpers/bit_ops.h deleted file mode 100644 index eee360c9e3..0000000000 --- a/arm_compute/core/utils/helpers/bit_ops.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H -#define ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H - -#include "arm_compute/core/utils/misc/Requires.h" - -#include - -namespace arm_compute -{ -namespace helpers -{ -namespace bit_ops -{ -/** Checks if the idx-th bit is set in an integral type - * - * @param[in] v Integral input - * @param[in] idx Index of the bit to check - * - * @return True if the idx-th bit is set else false - */ -template ::value)> -bool is_bit_set(T v, unsigned int idx) -{ - return (v & 1 << idx) != 0; -} -} // namespace bit_ops -} // namespace helpers -} // namespace arm_compute -#endif /* ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H */ diff --git a/arm_compute/core/utils/helpers/fft.h b/arm_compute/core/utils/helpers/fft.h deleted file mode 100644 index 7d111b764b..0000000000 --- a/arm_compute/core/utils/helpers/fft.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_HELPERS_FFT_H -#define ARM_COMPUTE_UTILS_HELPERS_FFT_H - -#include -#include - -namespace arm_compute -{ -namespace helpers -{ -namespace fft -{ -/** Decompose a given 1D input size using the provided supported factors. - * - * @param[in] N Input size to be decomposed. - * @param[in] supported_factors Supported factors that can be used for decomposition. - * - * @return A vector with the stages of the decomposition. Will be empty if decomposition failed. - */ -std::vector decompose_stages(unsigned int N, const std::set &supported_factors); -/** Calculate digit reverse index vector given fft size and the decomposed stages - * - * @param N Input size to calculate digit reverse for - * @param fft_stages A vector with the FFT decomposed stages - * - * @return A vector with the digit reverse indices. Will be empty if it failed. - */ -std::vector digit_reverse_indices(unsigned int N, const std::vector &fft_stages); -} // namespace fft -} // namespace helpers -} // namespace arm_compute -#endif /* ARM_COMPUTE_UTILS_HELPERS_FFT_H */ diff --git a/arm_compute/core/utils/helpers/float_ops.h b/arm_compute/core/utils/helpers/float_ops.h deleted file mode 100644 index 1a08fc76b4..0000000000 --- a/arm_compute/core/utils/helpers/float_ops.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H -#define ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H - -namespace arm_compute -{ -namespace helpers -{ -namespace float_ops -{ -union RawFloat -{ - /** Constructor - * - * @param[in] val Floating-point value - */ - explicit RawFloat(float val) - : f32(val) - { - } - /** Extract sign of floating point number - * - * @return Sign of floating point number - */ - int32_t sign() const - { - return i32 >> 31; - } - /** Extract exponent of floating point number - * - * @return Exponent of floating point number - */ - int32_t exponent() const - { - return (i32 >> 23) & 0xFF; - } - /** Extract mantissa of floating point number - * - * @return Mantissa of floating point number - */ - int32_t mantissa() const - { - return i32 & 0x007FFFFF; - } - - int32_t i32; - float f32; -}; - -/** Checks if two floating point numbers are equal given an allowed number of ULPs - * - * @param[in] a First number to compare - * @param[in] b Second number to compare - * @param[in] max_allowed_ulps (Optional) Number of allowed ULPs - * - * @return True if number is close else false - */ -inline bool is_equal_ulps(float a, float b, int max_allowed_ulps = 0) -{ - RawFloat ra(a); - RawFloat rb(b); - - // Check ULP distance - const int ulps = std::abs(ra.i32 - rb.i32); - return ulps <= max_allowed_ulps; -} - -/** Checks if the input floating point number is 1.0f checking if the difference is within a range defined with epsilon - * - * @param[in] a Input floating point number - * @param[in] epsilon (Optional) Epsilon used to define the error bounds - * - * @return True if number is close to 1.0f - */ -inline bool is_one(float a, float epsilon = 0.00001f) -{ - return std::abs(1.0f - a) <= epsilon; -} - -/** Checks if the input floating point number is 0.0f checking if the difference is within a range defined with epsilon - * - * @param[in] a Input floating point number - * @param[in] epsilon (Optional) Epsilon used to define the error bounds - * - * @return True if number is close to 0.0f - */ -inline bool is_zero(float a, float epsilon = 0.00001f) -{ - return std::abs(0.0f - a) <= epsilon; -} -} // namespace float_ops -} // namespace helpers -} // namespace arm_compute -#endif /* ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H */ diff --git a/arm_compute/core/utils/helpers/tensor_info.h b/arm_compute/core/utils/helpers/tensor_info.h deleted file mode 100644 index 443234064a..0000000000 --- a/arm_compute/core/utils/helpers/tensor_info.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H -#define ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H - -#include "arm_compute/core/ITensorInfo.h" - -namespace arm_compute -{ -namespace helpers -{ -namespace tensor_info -{ -/** Checks if the quantization info of given tensors are different - * - * @param tensor_info_1 Tensor info of the first tensor - * @param tensor_info_2 Tensor info of the second tensor - * @param tensor_infos Tensor infos of the rest tensors - * - * @return True if tensors have mismatching quantization info else false. - */ -template -inline bool tensors_have_different_quantization_info(const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos) -{ - const QuantizationInfo first_quantization_info = tensor_info_1->quantization_info(); - - const std::array < const ITensorInfo *, 1 + sizeof...(Ts) > tensor_infos_array{ { tensor_info_2, std::forward(tensor_infos)... } }; - return std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info) - { - return tensor_info->quantization_info() != first_quantization_info; - }); -} -} // namespace tensor_info -} // namespace helpers -} // namespace arm_compute -#endif /* ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H */ diff --git a/arm_compute/core/utils/math/SafeOps.h b/arm_compute/core/utils/math/SafeOps.h index 4f81cf4b44..c222c65e84 100644 --- a/arm_compute/core/utils/math/SafeOps.h +++ b/arm_compute/core/utils/math/SafeOps.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,9 @@ #define ARM_COMPUTE_UTILS_MATH_SAFE_OPS #include "arm_compute/core/Error.h" -#include "arm_compute/core/utils/misc/Requires.h" +#include "support/Requires.h" + +#include namespace arm_compute { diff --git a/arm_compute/core/utils/misc/CRTP.h b/arm_compute/core/utils/misc/CRTP.h deleted file mode 100644 index d295500bef..0000000000 --- a/arm_compute/core/utils/misc/CRTP.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MISC_CRTP_H -#define ARM_COMPUTE_MISC_CRTP_H - -namespace arm_compute -{ -namespace misc -{ -/** Curiously recurring template pattern Interface */ -template class Type> -struct CRTP -{ -public: - /** Exact type */ - using ExactType = T; - -protected: - const T &impl() const - { - return static_cast(*this); - } - T &impl() - { - return static_cast(*this); - } - -private: - CRTP() = default; - friend Type; -}; -} // namespace misc -} // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_CRTP_H */ diff --git a/arm_compute/core/utils/misc/Cast.h b/arm_compute/core/utils/misc/Cast.h deleted file mode 100644 index 57c7e49942..0000000000 --- a/arm_compute/core/utils/misc/Cast.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MISC_CAST_H -#define ARM_COMPUTE_MISC_CAST_H - -#include "arm_compute/core/Error.h" - -namespace arm_compute -{ -namespace utils -{ -namespace cast -{ -/** Polymorphic cast between two types - * - * @warning Will throw an exception if cast cannot take place - * - * @tparam Target Target to cast type - * @tparam Source Source from cast type - * - * @param[in] v Value to cast - * - * @return The casted value - */ -template -inline Target polymorphic_cast(Source *v) -{ - if(dynamic_cast(v) == nullptr) - { - ARM_COMPUTE_THROW(std::bad_cast()); - } - return static_cast(v); -} - -/** Polymorphic down cast between two types - * - * @warning Will assert if cannot take place - * - * @tparam Target Target to cast type - * @tparam Source Source from cast type - * - * @param[in] v Value to cast - * - * @return The casted value - */ -template -inline Target polymorphic_downcast(Source *v) -{ - ARM_COMPUTE_ERROR_ON(dynamic_cast(v) != static_cast(v)); - return static_cast(v); -} - -/** Polymorphic cast between two unique pointer types - * - * @warning Will throw an exception if cast cannot take place - * - * @tparam Target Target to cast type - * @tparam Source Source from cast type - * @tparam Deleter Deleter function type - * - * @param[in] v Value to cast - * - * @return The casted value - */ -template -std::unique_ptr polymorphic_cast_unique_ptr(std::unique_ptr &&v) -{ - if(dynamic_cast(v.get()) == nullptr) - { - ARM_COMPUTE_THROW(std::bad_cast()); - } - auto r = static_cast(v.release()); - return std::unique_ptr(r, std::move(v.get_deleter())); -} - -/** Polymorphic down cast between two unique pointer types - * - * @warning Will assert if cannot take place - * - * @tparam Target Target to cast type - * @tparam Source Source from cast type - * @tparam Deleter Deleter function type - * - * @param[in] v Value to cast - * - * @return The casted value - */ -template -std::unique_ptr polymorphic_downcast_unique_ptr(std::unique_ptr &&v) -{ - ARM_COMPUTE_ERROR_ON(dynamic_cast(v.get()) != static_cast(v.get())); - auto r = static_cast(v.release()); - return std::unique_ptr(r, std::move(v.get_deleter())); -} -} // namespace cast -} // namespace utils -} // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_CAST_H */ diff --git a/arm_compute/core/utils/misc/ICloneable.h b/arm_compute/core/utils/misc/ICloneable.h deleted file mode 100644 index cbb0b3c149..0000000000 --- a/arm_compute/core/utils/misc/ICloneable.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MISC_ICLONEABLE_H -#define ARM_COMPUTE_MISC_ICLONEABLE_H - -#include - -namespace arm_compute -{ -namespace misc -{ -/** Clonable Interface */ -template -class ICloneable -{ -public: - /** Default virtual desctructor */ - virtual ~ICloneable() = default; - /** Provide a clone of the current object of class T - * - * @return Clone object of class T - */ - virtual std::unique_ptr clone() const = 0; -}; -} // namespace misc -} // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_ICLONEABLE_H */ diff --git a/arm_compute/core/utils/misc/Iterable.h b/arm_compute/core/utils/misc/Iterable.h deleted file mode 100644 index 34232088e8..0000000000 --- a/arm_compute/core/utils/misc/Iterable.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MISC_ITERABLE_H -#define ARM_COMPUTE_MISC_ITERABLE_H - -#include - -namespace arm_compute -{ -namespace utils -{ -namespace iterable -{ -/** Reverse range iterable class - * - * @tparam T Type to create a reverse range on - */ -template -class reverse_iterable -{ -public: - /** Default constructor - * - * @param[in] it Value to reverse iterate on - */ - explicit reverse_iterable(T &it) - : _it(it) - { - } - - /** Get beginning of iterator. - * - * @return beginning of iterator. - */ - typename T::reverse_iterator begin() - { - return _it.rbegin(); - } - - /** Get end of iterator. - * - * @return end of iterator. - */ - typename T::reverse_iterator end() - { - return _it.rend(); - } - - /** Get beginning of const iterator. - * - * @return beginning of const iterator. - */ - typename T::const_reverse_iterator cbegin() - { - return _it.rbegin(); - } - - /** Get end of const iterator. - * - * @return end of const iterator. - */ - typename T::const_reverse_iterator cend() - { - return _it.rend(); - } - -private: - T &_it; -}; - -/** Creates a reverse iterable for a given type - * - * @tparam T Type to create a reverse iterable on - * - * @param[in] val Iterable input - * - * @return Reverse iterable container - */ -template -reverse_iterable reverse_iterate(T &val) -{ - return reverse_iterable(val); -} -} // namespace iterable -} // namespace utils -} // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_ITERABLE_H */ diff --git a/arm_compute/core/utils/misc/Random.h b/arm_compute/core/utils/misc/Random.h deleted file mode 100644 index 6832c495e3..0000000000 --- a/arm_compute/core/utils/misc/Random.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MISC_RANDOM_H -#define ARM_COMPUTE_MISC_RANDOM_H - -#include "arm_compute/core/Error.h" - -#include -#include - -namespace arm_compute -{ -namespace utils -{ -namespace random -{ -/** Uniform distribution within a given number of sub-ranges - * - * @tparam T Distribution primitive type - */ -template -class RangedUniformDistribution -{ -public: - using DT = typename std::conditional::value, - std::uniform_int_distribution, - std::uniform_real_distribution>::type; - using result_type = T; - using range_pair = std::pair; - -public: - /** Constructor - * - * @param[in] low lowest value in the range (inclusive) - * @param[in] high highest value in the range (inclusive for uniform_int_distribution, exclusive for uniform_real_distribution) - * @param[in] exclude_ranges Ranges to exclude from the generator - */ - RangedUniformDistribution(result_type low, result_type high, const std::vector &exclude_ranges) - : _distributions(), _selector() - { - result_type clow = low; - for(const auto &erange : exclude_ranges) - { - result_type epsilon = std::is_integral::value ? 1 : static_cast(std::numeric_limits::epsilon()); - - ARM_COMPUTE_ERROR_ON(clow > erange.first || clow >= erange.second); - - _distributions.emplace_back(DT(clow, erange.first - epsilon)); - clow = erange.second + epsilon; - } - ARM_COMPUTE_ERROR_ON(clow > high); - _distributions.emplace_back(DT(clow, high)); - _selector = std::uniform_int_distribution(0, _distributions.size() - 1); - } - /** Generate random number - * - * @tparam URNG Random number generator object type - * - * @param[in] g A uniform random number generator object, used as the source of randomness. - * - * @return A new random number. - */ - template - result_type operator()(URNG &g) - { - unsigned int rand_select = _selector(g); - return _distributions[rand_select](g); - } - -private: - std::vector
_distributions; - std::uniform_int_distribution _selector; -}; -} // namespace random -} // namespace utils -} // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_RANDOM_H */ diff --git a/arm_compute/core/utils/misc/Requires.h b/arm_compute/core/utils/misc/Requires.h deleted file mode 100644 index ba91039596..0000000000 --- a/arm_compute/core/utils/misc/Requires.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_REQUIRES_H -#define ARM_COMPUTE_UTILS_REQUIRES_H - -namespace arm_compute -{ -namespace utils -{ -namespace requires -{ -// *INDENT-OFF* -// clang-format off -namespace detail -{ -enum class enabler -{ -}; -} // namespace arm_compute - -/** Requirements as template */ -#define REQUIRES_T(...) template ::type = 0> -/** Requirements as template argument */ -#define REQUIRES_TA(...) typename = typename std::enable_if<(__VA_ARGS__), arm_compute::utils::requires::detail::enabler>::type -// clang-format on -// *INDENT-ON* -} // namespace requires -} // namespace utils -} // namespace arm_compute -#endif /*ARM_COMPUTE_UTILS_REQUIRES_H */ diff --git a/arm_compute/core/utils/misc/Rounding.h b/arm_compute/core/utils/misc/Rounding.h deleted file mode 100644 index 1ed4e64886..0000000000 --- a/arm_compute/core/utils/misc/Rounding.h +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_ROUNDING_H -#define ARM_COMPUTE_UTILS_ROUNDING_H - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/utils/misc/Requires.h" -#include "arm_compute/core/utils/misc/Traits.h" -#include "support/ToolchainSupport.h" - -#include - -namespace arm_compute -{ -namespace utils -{ -namespace rounding -{ -/** Rounding mode */ -enum class RoundingMode -{ - TO_ZERO, /**< Round towards zero */ - AWAY_FROM_ZERO, /**< Round away from zero */ - HALF_TO_ZERO, /**< Round half towards from zero */ - HALF_AWAY_FROM_ZERO, /**< Round half away from zero */ - HALF_UP, /**< Round half towards positive infinity */ - HALF_DOWN, /**< Round half towards negative infinity */ - HALF_EVEN /**< Round half towards nearest even */ -}; - -/** Round floating-point value with round to zero - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template ::value)> -inline T round_to_zero(T value) -{ - T res = std::floor(std::fabs(value)); - return (value < 0.f) ? -res : res; -} - -/** Round floating-point value with round away from zero - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template ::value)> -inline T round_away_from_zero(T value) -{ - T res = std::ceil(std::fabs(value)); - return (value < 0.f) ? -res : res; -} - -/** Round floating-point value with half value rounding towards zero. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template ::value)> -inline T round_half_to_zero(T value) -{ - T res = T(std::ceil(std::fabs(value) - 0.5f)); - return (value < 0.f) ? -res : res; -} - -/** Round floating-point value with half value rounding away from zero. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template ::value)> -inline T round_half_away_from_zero(T value) -{ - T res = T(std::floor(std::fabs(value) + 0.5f)); - return (value < 0.f) ? -res : res; -} - -/** Round floating-point value with half value rounding to positive infinity. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template ::value)> -inline T round_half_up(T value) -{ - return std::floor(value + 0.5f); -} - -/** Round floating-point value with half value rounding to negative infinity. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template ::value)> -inline T round_half_down(T value) -{ - return std::ceil(value - 0.5f); -} - -/** Round floating-point value with half value rounding to nearest even. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * @param[in] epsilon precision. - * - * @return Floating-point value of rounded @p value. - */ -template ::value)> -inline T round_half_even(T value, T epsilon = std::numeric_limits::epsilon()) -{ - T positive_value = std::abs(value); - T ipart = 0; - std::modf(positive_value, &ipart); - // If 'value' is exactly halfway between two integers - if(std::abs(positive_value - (ipart + 0.5f)) < epsilon) - { - // If 'ipart' is even then return 'ipart' - if(std::fmod(ipart, 2.f) < epsilon) - { - return support::cpp11::copysign(ipart, value); - } - // Else return the nearest even integer - return support::cpp11::copysign(std::ceil(ipart + 0.5f), value); - } - // Otherwise use the usual round to closest - return support::cpp11::copysign(support::cpp11::round(positive_value), value); -} - -/** Round floating-point value given a rounding mode - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * @param[in] rounding_mode Rounding mode to use. - * - * @return Floating-point value of rounded @p value. - */ -template ::value)> -inline T round(T value, RoundingMode rounding_mode) -{ - switch(rounding_mode) - { - case RoundingMode::TO_ZERO: - return round_to_zero(value); - case RoundingMode::AWAY_FROM_ZERO: - return round_away_from_zero(value); - case RoundingMode::HALF_TO_ZERO: - return round_half_to_zero(value); - case RoundingMode::HALF_AWAY_FROM_ZERO: - return round_half_away_from_zero(value); - case RoundingMode::HALF_UP: - return round_half_up(value); - case RoundingMode::HALF_DOWN: - return round_half_down(value); - case RoundingMode::HALF_EVEN: - return round_half_even(value); - default: - ARM_COMPUTE_ERROR("Unsupported rounding mode!"); - } -} -} // namespace rounding -} // namespace utils -} // namespace arm_compute -#endif /*ARM_COMPUTE_UTILS_ROUNDING_H */ diff --git a/arm_compute/core/utils/misc/SaturateCast.h b/arm_compute/core/utils/misc/SaturateCast.h deleted file mode 100644 index cbced83f89..0000000000 --- a/arm_compute/core/utils/misc/SaturateCast.h +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H -#define ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H - -#include "arm_compute/core/utils/misc/Rounding.h" -#include "arm_compute/core/utils/misc/Traits.h" -#include "arm_compute/core/utils/misc/Utility.h" - -namespace arm_compute -{ -namespace utils -{ -namespace cast -{ -// *INDENT-OFF* -// clang-format off -// same type -template::value, int >::type = 0 > -T saturate_cast(U v) -{ - return v; -} - -// signed -> signed widening/same_width -template::value && - std::is_integral::value && - std::is_signed() && - std::is_signed() && - !std::is_same::value && - sizeof(T) >= sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast(v); -} -// signed -> signed narrowing -template::value && - std::is_integral::value && - std::is_signed() && - std::is_signed() && - !std::is_same::value && - sizeof(T) < sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast(utility::clamp(v, std::numeric_limits::lowest(), std::numeric_limits::max())); -} - -// unsigned -> signed widening -template::value && - std::is_integral::value && - std::is_unsigned() && - std::is_signed() && - !std::is_same::value && - (sizeof(T) > sizeof(U)), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast(v); -} -// unsigned -> signed narrowing -template::value && - std::is_integral::value && - std::is_unsigned() && - std::is_signed() && - !std::is_same::value && - sizeof(T) < sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast(std::min(v, std::numeric_limits::max())); -} -// unsigned -> signed same_width -template::value && - std::is_integral::value && - std::is_unsigned() && - std::is_signed() && - !std::is_same::value && - sizeof(T) == sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast(std::min(v, std::numeric_limits::max())); -} - -// signed -> unsigned widening/same width -template::value && - std::is_integral::value && - std::is_signed() && - std::is_unsigned() && - !std::is_same::value && - sizeof(T) >= sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast(std::max(0, v)); -} - -// signed -> unsigned narrowing -template::value && - std::is_integral::value && - std::is_signed() && - std::is_unsigned() && - !std::is_same::value && - sizeof(T) < sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast(utility::clamp(v, 0, std::numeric_limits::max())); -} - -// unsigned -> unsigned widening/same width -template::value && - std::is_integral::value && - std::is_unsigned() && - std::is_unsigned() && - !std::is_same::value && - sizeof(T) >= sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast(v); -} - -// unsigned -> unsigned narrowing -template::value && - std::is_integral::value && - std::is_unsigned() && - std::is_unsigned() && - !std::is_same::value && - sizeof(T) < sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast(utility::clamp(v, std::numeric_limits::lowest(), std::numeric_limits::max())); -} - -// float -> int -template::value && - traits::is_floating_point::value, - int >::type = 0 > -inline T saturate_cast(U v) -{ - int32_t vi = utils::rounding::round_half_away_from_zero(v); - return saturate_cast(vi); -} - -// int -> float -template::value && - std::is_integral::value, - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast(v); -} - -// float -> float -template::value && - traits::is_floating_point::value, - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast(v); -} -// clang-format on -// *INDENT-ON* -} // namespace cast -} // namespace utils -} // namespace arm_compute -#endif /* ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H */ -- cgit v1.2.1