diff options
Diffstat (limited to 'arm_compute/core')
71 files changed, 4375 insertions, 2829 deletions
diff --git a/arm_compute/core/CL/CLCompileContext.h b/arm_compute/core/CL/CLCompileContext.h index 46a8c9b341..dcd3b45670 100644 --- a/arm_compute/core/CL/CLCompileContext.h +++ b/arm_compute/core/CL/CLCompileContext.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -77,6 +77,8 @@ public: */ const StringSet &options() const; + bool operator==(const CLBuildOptions &other) const; + private: StringSet _build_opts; /**< Build options set */ }; @@ -248,8 +250,12 @@ public: * * @return The created kernel. */ - Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source, - const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const; + Kernel create_kernel(const std::string &kernel_name, + const std::string &program_name, + const std::string &program_source, + const std::string &kernel_path, + const StringSet &build_options_set, + bool is_binary) const; /** Clear the library's cache of binary programs */ @@ -302,6 +308,18 @@ public: */ bool is_wbsm_supported() const; + /** Return the DDK version. If the DDK version cannot be detected, return -1. + * + * @return The DDK version. + */ + int32_t get_ddk_version() const; + + /** Return the Gpu target of the associated device + * + * @return GPUTarget + */ + GPUTarget get_gpu_target() const; + private: /** Load program and its dependencies. * @@ -309,7 +327,8 @@ private: * @param[in] program_source Source of the program. * @param[in] is_binary Flag to indicate if the program source is binary. */ - const Program &load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const; + const Program & + load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const; /** Generates the build options given a string of user defined ones * @@ -329,11 +348,11 @@ private: */ std::string stringify_set(const StringSet &s, const std::string &kernel_path) const; - cl::Context _context; /**< Underlying CL context. */ - CLDevice _device; /**< Underlying CL device. */ + cl::Context _context; /**< Underlying CL context. */ + CLDevice _device; /**< Underlying CL device. */ mutable std::map<std::string, const Program> _programs_map; /**< Map with all already loaded program data. */ mutable std::map<std::string, cl::Program> _built_programs_map; /**< Map with all already built program data. */ - bool _is_wbsm_supported; /**< Support of worksize batch size modifier support boolean*/ + bool _is_wbsm_supported; /**< Support of worksize batch size modifier support boolean*/ }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLCOMPILECONTEXT_H */ diff --git a/arm_compute/core/CL/CLDevice.h b/arm_compute/core/CL/CLDevice.h index 06aaac88f4..ded6bb8493 100644 --- a/arm_compute/core/CL/CLDevice.h +++ b/arm_compute/core/CL/CLDevice.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,8 +44,7 @@ class CLDevice : public IDevice { public: /** Default Constructor */ - CLDevice() - : _device(cl::Device()), _options() + CLDevice() : _device(cl::Device()), _options() { } @@ -53,8 +52,7 @@ public: * * @param[in] cl_device OpenCL device */ - CLDevice(const cl::Device &cl_device) - : _device(), _options() + CLDevice(const cl::Device &cl_device) : _device(), _options() { _device = cl_device; @@ -66,13 +64,13 @@ public: std::string extensions = _device.getInfo<CL_DEVICE_EXTENSIONS>(); std::istringstream iss(extensions); - for(std::string s; iss >> s;) + for (std::string s; iss >> s;) { _options.extensions.insert(s); } // SW workaround for G76 - if(_options.gpu_target == GPUTarget::G76) + if (_options.gpu_target == GPUTarget::G76) { _options.extensions.insert("cl_arm_integer_dot_product_int8"); } @@ -143,6 +141,32 @@ public: return _options.extensions.count(extension) != 0; } + /** Returns whether non-uniform workgroup is supported and the build options. + * + * If the feature is supported, the appropriate build options will be + * appended to the specified string. + * + * @return A tuple (supported, build_options) indicating whether the feature + * is supported and the corresponding build options to enable it. + */ + std::tuple<bool, std::string> is_non_uniform_workgroup_supported() const + { + if (version() == CLVersion::CL30 && get_cl_non_uniform_work_group_supported(_device)) + { + return {true, " -cl-std=CL3.0 "}; + } + else if (version() == CLVersion::CL20) + { + return {true, " -cl-std=CL2.0 "}; + } + else if (supported("cl_arm_non_uniform_work_group_size")) + { + return {true, " -cl-arm-non-uniform-work-group-size "}; + } + + return {false, ""}; + } + private: cl::Device _device; /**< OpenCL device. */ struct CLDeviceOptions _options; /**< OpenCL device options */ diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h index a9ac6a5933..1a639e47f9 100644 --- a/arm_compute/core/CL/CLHelpers.h +++ b/arm_compute/core/CL/CLHelpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,11 +31,6 @@ #include <set> #include <string> -/* CL Device capabilities */ -#define ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM 0x41E4 -/* Workgroup Batch Size Modifier */ -#define ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM 0x41E6 - namespace arm_compute { class CLCompileContext; @@ -46,6 +41,9 @@ enum class DataType; /** Max vector width of an OpenCL vector */ static constexpr unsigned int max_cl_vector_width = 16; +/** Max number of manual loop unrolling */ +static constexpr int max_manual_loop_unrolling = 128; + /** Translates a tensor data type to the appropriate OpenCL type. * * @param[in] dt @ref DataType to be translated to OpenCL type. @@ -126,6 +124,14 @@ CLVersion get_cl_version(const cl::Device &device); */ size_t get_cl_image_pitch_alignment(const cl::Device &device); +/** Helper function to check whether non-uniform work group is supported + * + * @param[in] device A CL device + * + * @return True if the feature is supported + */ +bool get_cl_non_uniform_work_group_supported(const cl::Device &device); + /** Helper function to check whether a given extension is supported * * @param[in] device A CL device @@ -173,7 +179,9 @@ bool dot8_acc_supported(const cl::Device &device); * * @return True if the configuration is supported */ -bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout); +bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, + const Size2D &kernel_size, + DataLayout data_layout); /** Helper function to get the preferred native vector width size for built-in scalar types that can be put into vectors * @@ -209,7 +217,9 @@ bool image2d_from_buffer_supported(const cl::Device &device); * * @return An opencl kernel */ -cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts = std::set<std::string>()); +cl::Kernel create_kernel(const CLCompileContext &ctx, + const std::string &kernel_name, + const std::set<std::string> &build_opts = std::set<std::string>()); /** Creates a suitable LWS hint object for parallel implementations. Sets the number of WG based on the input size. * If input width is smaller than 128 we can use fewer threads than 8. @@ -236,5 +246,47 @@ bool get_wbsm_support_info(const cl::Device &device); */ void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint); +/* Helper function to check if we can export the tensor to cl_image + * + * @param[in] input tensor + * + * @return true if we can export the tensor to cl_image + */ +bool export_to_cl_image(const ITensorInfo *tensor); + +/* Helper function to force unroll with pragma when any of the input values (iterations) are greater than @ref max_manual_loop_unrolling + * + * This function passes UNROLL_WITH_PRAGMA at compile time when any of the input values are greater than @ref max_manual_loop_unrolling + * + * @param[in] built_opts OpenCL kernel build options + * @param[in] values Input values (iterations) + * + */ +void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list<int> values); + +/** Helper function to check whether the cl_arm_matrix_multiply extension is supported + * + * @param[in] device A CL device + * + * @return True if the extension is supported + */ +bool arm_matrix_multiply_supported(const cl::Device &device); + +/** Check whether cl_khr_command_buffer extension is supported by the specified CL device. + * + * @param[in] device The CL device + * + * @return True if the extension is supported by the CL device. + */ +bool command_buffer_supported(const cl::Device &device); + +/** Check whether cl_khr_command_buffer_mutable_dispatch extension is supported by the specified CL device. + * + * @param[in] device The CL device + * + * @return True if the extension is supported by the CL device. + */ +bool command_buffer_mutable_dispatch_supported(const cl::Device &device); + } // namespace arm_compute #endif /* ARM_COMPUTE_CLHELPERS_H */ diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h index 2ba2e8dd62..0f088e2b10 100644 --- a/arm_compute/core/CL/CLTypes.h +++ b/arm_compute/core/CL/CLTypes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,7 +41,8 @@ enum class CLVersion CL10, /* the OpenCL 1.0 */ CL11, /* the OpenCL 1.1 */ CL12, /* the OpenCL 1.2 */ - CL20, /* the OpenCL 2.0 and above */ + CL20, /* the OpenCL 2.x */ + CL30, /* the OpenCL 3.x */ UNKNOWN /* unkown version */ }; @@ -62,18 +63,27 @@ struct CLDeviceOptions struct CLQuantization { /** Default Constructor */ - CLQuantization() - : scale(nullptr), offset(nullptr) {}; + CLQuantization() : scale(nullptr), offset(nullptr){}; /** Constructor * * @param[in] scale OpenCL scale array * @param[in] offset OpenCL offset array */ - CLQuantization(const ICLFloatArray *scale, const ICLInt32Array *offset) - : scale(scale), offset(offset) {}; + CLQuantization(const ICLFloatArray *scale, const ICLInt32Array *offset) : scale(scale), offset(offset){}; const ICLFloatArray *scale; /**< Quantization scale array */ const ICLInt32Array *offset; /**< Quantization offset array */ }; + +enum CLKernelType +{ + UNKNOWN, /**< Unknown CL kernel type */ + DEPTHWISE, /**< Depthwise CL kernel type */ + DIRECT, /**< Direct Convolution CL kernel type */ + ELEMENTWISE, /**< Elementwise CL kernel type */ + GEMM, /**< GEMM CL kernel type */ + POOL, /**< Pool CL kernel type */ + WINOGRAD /**< Winograd CL kernel type */ +}; } // namespace arm_compute #endif /* ARM_COMPUTE_CL_TYPES_H */ diff --git a/arm_compute/core/CL/ICLArray.h b/arm_compute/core/CL/ICLArray.h index b900117724..a2b2baa5b3 100644 --- a/arm_compute/core/CL/ICLArray.h +++ b/arm_compute/core/CL/ICLArray.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,8 +40,7 @@ public: * @param[in] max_num_values Maximum size of the array. * */ - explicit ICLArray(size_t max_num_values) - : IArray<T>(max_num_values), _mapping(nullptr) + explicit ICLArray(size_t max_num_values) : IArray<T>(max_num_values), _mapping(nullptr) { } @@ -66,8 +65,6 @@ public: * @param[in] blocking If true, then the mapping will be ready to use by the time * this method returns, else it is the caller's responsibility * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. - * - * @return The mapping address. */ void map(cl::CommandQueue &q, bool blocking = true) { @@ -127,5 +124,5 @@ using ICLInt16Array = ICLArray<cl_short>; using ICLInt32Array = ICLArray<cl_int>; /** Interface for OpenCL Array of floats. */ using ICLFloatArray = ICLArray<cl_float>; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_ICLARRAY_H*/ diff --git a/arm_compute/core/CL/ICLTensor.h b/arm_compute/core/CL/ICLTensor.h index fd05e64732..8de5423762 100644 --- a/arm_compute/core/CL/ICLTensor.h +++ b/arm_compute/core/CL/ICLTensor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2019, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_ICLTENSOR_H #define ARM_COMPUTE_ICLTENSOR_H -#include "arm_compute/core/ITensor.h" - #include "arm_compute/core/CL/CLTypes.h" +#include "arm_compute/core/ITensor.h" #include <cstdint> @@ -34,7 +33,7 @@ namespace cl { class Buffer; class CommandQueue; -} +} // namespace cl namespace arm_compute { @@ -71,8 +70,6 @@ public: * @param[in] blocking If true, then the mapping will be ready to use by the time * this method returns, else it is the caller's responsibility * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. - * - * @return The mapping address. */ void map(cl::CommandQueue &q, bool blocking = true); /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. @@ -115,5 +112,5 @@ private: }; using ICLImage = ICLTensor; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_ICLTENSOR_H */ diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h index bbe469f1a8..8b5bf97099 100644 --- a/arm_compute/core/CL/OpenCL.h +++ b/arm_compute/core/CL/OpenCL.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_OPENCL_H -#define ARM_COMPUTE_OPENCL_H +#ifndef ACL_ARM_COMPUTE_CORE_CL_OPENCL_H +#define ACL_ARM_COMPUTE_CORE_CL_OPENCL_H #include <string> #include <utility> @@ -31,8 +31,8 @@ #ifndef ARM_COMPUTE_NO_EXCEPTIONS #define CL_HPP_ENABLE_EXCEPTIONS #endif // ARM_COMPUTE_NO_EXCEPTIONS -#define CL_TARGET_OPENCL_VERSION 200 -#define CL_HPP_TARGET_OPENCL_VERSION 110 +#define CL_TARGET_OPENCL_VERSION 300 +#define CL_HPP_TARGET_OPENCL_VERSION 110 #define CL_HPP_MINIMUM_OPENCL_VERSION 110 #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Weffc++" @@ -40,8 +40,8 @@ #pragma GCC diagnostic ignored "-Wunused-parameter" #if defined(__GNUG__) && __GNUG__ >= 8 #pragma GCC diagnostic ignored "-Wcatch-value" -#endif // defined(__GNUG__) && __GNUG__ >= 8 -#include <CL/cl2.hpp> +#endif // defined(__GNUG__) && __GNUG__ >= 8 +#include <CL/opencl.hpp> // include new hpp header instead of cl2.hpp #pragma GCC diagnostic pop namespace cl @@ -73,21 +73,22 @@ public: * @return The static instance of CLSymbols. */ static CLSymbols &get(); - /** Load symbols from the given OpenCL library path. + /** This method attempts to load the OpenCL symbols from the first available library from the provided OpenCL libraries. * - * @param[in] library Path to the OpenCL library. + * @param[in] libraries_filenames Vector containing the filenames of the libraries to be loaded. + * @param[in] use_loader Use symbol loader function loadOpenCLPointer. * - * @return True if loading the library is successful. + * @return True if loading the library is successful. False if all the provided libraries could not be loaded. */ - bool load(const std::string &library); + bool load(const std::vector<std::string> &libraries_filenames, bool use_loader = false); /** Load symbols from any of the default OpenCL library names. + * If all the default libraries could not be loaded, this method will print a warning message and return false. * * @return True if loading any library is successful. */ bool load_default(); -#define DECLARE_FUNCTION_PTR(func_name) \ - std::function<decltype(func_name)> func_name##_ptr = nullptr +#define DECLARE_FUNCTION_PTR(func_name) std::function<decltype(func_name)> func_name##_ptr = nullptr DECLARE_FUNCTION_PTR(clCreateContext); DECLARE_FUNCTION_PTR(clCreateContextFromType); @@ -138,6 +139,17 @@ public: DECLARE_FUNCTION_PTR(clWaitForEvents); DECLARE_FUNCTION_PTR(clCreateImage); DECLARE_FUNCTION_PTR(clSetKernelExecInfo); + DECLARE_FUNCTION_PTR(clGetExtensionFunctionAddressForPlatform); + + // Command buffer and mutable dispatch command buffer extensions + DECLARE_FUNCTION_PTR(clCreateCommandBufferKHR); + DECLARE_FUNCTION_PTR(clRetainCommandBufferKHR); + DECLARE_FUNCTION_PTR(clReleaseCommandBufferKHR); + DECLARE_FUNCTION_PTR(clFinalizeCommandBufferKHR); + DECLARE_FUNCTION_PTR(clEnqueueCommandBufferKHR); + DECLARE_FUNCTION_PTR(clCommandNDRangeKernelKHR); + + DECLARE_FUNCTION_PTR(clUpdateMutableCommandsKHR); // Third-party extensions DECLARE_FUNCTION_PTR(clImportMemoryARM); @@ -148,4 +160,4 @@ private: std::pair<bool, bool> _loaded; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_OPENCL_H */ +#endif // ACL_ARM_COMPUTE_CORE_CL_OPENCL_H diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h index 11891937d1..e5322bdcb1 100644 --- a/arm_compute/core/CPP/CPPTypes.h +++ b/arm_compute/core/CPP/CPPTypes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPP_TYPES_H -#define ARM_COMPUTE_CPP_TYPES_H +#ifndef ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H +#define ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H #include "arm_compute/core/Error.h" @@ -30,17 +30,26 @@ namespace arm_compute { +namespace cpuinfo +{ +struct CpuIsaInfo; +} // namespace cpuinfo + #define ARM_COMPUTE_CPU_MODEL_LIST \ X(GENERIC) \ X(GENERIC_FP16) \ X(GENERIC_FP16_DOT) \ - X(A35) \ X(A53) \ X(A55r0) \ X(A55r1) \ + X(A35) \ X(A73) \ - X(KLEIN) \ - X(X1) + X(A76) \ + X(A510) \ + X(X1) \ + X(V1) \ + X(A64FX) \ + X(N1) /** CPU models types * @@ -56,39 +65,79 @@ enum class CPUModel class CPUInfo final { -public: - /** Constructor */ +protected: CPUInfo(); ~CPUInfo(); - /** Disable copy constructor and assignment operator to avoid copying the vector of CPUs each time - * CPUInfo is initialized once in the IScheduler and ThreadInfo will get a pointer to it. +public: + /** Access the KernelLibrary singleton. + * This method has been deprecated and will be removed in future releases + * @return The KernelLibrary instance. */ - CPUInfo &operator=(const CPUInfo &cpuinfo) = delete; - CPUInfo(const CPUInfo &cpuinfo) = delete; - CPUInfo &operator=(CPUInfo &&cpuinfo) = default; - CPUInfo(CPUInfo &&cpuinfo) = default; + static CPUInfo &get(); + + /* Delete move and copy constructors and assignment operator + s */ + CPUInfo(CPUInfo const &) = delete; // Copy construct + CPUInfo(CPUInfo &&) = delete; // Move construct + CPUInfo &operator=(CPUInfo const &) = delete; // Copy assign + CPUInfo &operator=(CPUInfo &&) = delete; // Move assign /** Checks if the cpu model supports fp16. * - * @return true of the cpu supports fp16, false otherwise + * @return true if the cpu supports fp16, false otherwise */ bool has_fp16() const; /** Checks if the cpu model supports bf16. * - * @return true of the cpu supports bf16, false otherwise + * @return true if the cpu supports bf16, false otherwise */ bool has_bf16() const; + /** Checks if the cpu model supports bf16. + * + * @return true if the cpu supports bf16, false otherwise + */ + bool has_svebf16() const; /** Checks if the cpu model supports dot product. * - * @return true of the cpu supports dot product, false otherwise + * @return true if the cpu supports dot product, false otherwise */ bool has_dotprod() const; + /** Checks if the cpu model supports floating-point matrix multiplication. + * + * @return true if the cpu supports floating-point matrix multiplication, false otherwise + */ + bool has_svef32mm() const; + /** Checks if the cpu model supports integer matrix multiplication. + * + * @return true if the cpu supports integer matrix multiplication, false otherwise + */ + bool has_i8mm() const; + /** Checks if the cpu model supports integer matrix multiplication. + * + * @return true if the cpu supports integer matrix multiplication, false otherwise + */ + bool has_svei8mm() const; /** Checks if the cpu model supports sve. * - * @return true of the cpu supports sve, false otherwise + * @return true if the cpu supports sve, false otherwise */ bool has_sve() const; + /** Checks if the cpu model supports sve2. + * + * @return true if the cpu supports sve2, false otherwise + */ + bool has_sve2() const; + /** Checks if the cpu model supports sme. + * + * @return true if the cpu supports sme, false otherwise + */ + bool has_sme() const; + /** Checks if the cpu model supports sme2. + * + * @return true if the cpu supports sme2, false otherwise + */ + bool has_sme2() const; /** Gets the cpu model for a given cpuid. * * @param[in] cpuid the id of the cpu core to be retrieved, @@ -101,6 +150,11 @@ public: * @return Current thread's @ref CPUModel */ CPUModel get_cpu_model() const; + /** Gets the current cpu's ISA information + * + * @return Current cpu's ISA information + */ + cpuinfo::CpuIsaInfo get_isa() const; /** Gets the L1 cache size * * @return the size of the L1 cache @@ -111,22 +165,29 @@ public: * @return the size of the L1 cache */ unsigned int get_L2_cache_size() const; - /** Set fp16 support + /** Return the maximum number of CPUs present + * + * @return Number of CPUs + */ + unsigned int get_cpu_num() const; + /** Return the maximum number of CPUs present excluding the little cores + * in case of an Android device * - * @param[in] fp16 whether the cpu supports fp16. + * @return Number of CPUs excluding little */ - void set_fp16(const bool fp16); - /** Set dot product support + unsigned int get_cpu_num_excluding_little() const; + /** Return whether the device has little, medium and big CPUs in case + * of an Android device, returns false otherwise * - * @param[in] dotprod whether the cpu supports dot product. + * @return Whether the device has little, medium and big CPUs */ - void set_dotprod(const bool dotprod); + bool cpu_has_little_mid_big() const; - /** Return the maximum number of CPUs present + /** Return the vector length in bytes for sme2 * - * @return Number of CPUs + * @return Vector length if sme2 is enabled, otherwise returns 0. */ - unsigned int get_cpu_num() const; + unsigned long get_sme2_vector_length() const; private: struct Impl; @@ -136,9 +197,9 @@ private: /** Information about executing thread and CPU. */ struct ThreadInfo { - int thread_id{ 0 }; - int num_threads{ 1 }; - const CPUInfo *cpu_info{ nullptr }; + int thread_id{0}; + int num_threads{1}; + const CPUInfo *cpu_info{nullptr}; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_CPP_TYPES_H */ +#endif // ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h index ab369ffe1d..03967a536d 100644 --- a/arm_compute/core/CPP/ICPPKernel.h +++ b/arm_compute/core/CPP/ICPPKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,9 +25,9 @@ #define ARM_COMPUTE_ICPPKERNEL_H #include "arm_compute/core/CPP/CPPTypes.h" +#include "arm_compute/core/experimental/Types.h" #include "arm_compute/core/IKernel.h" #include "arm_compute/core/Types.h" -#include "arm_compute/core/experimental/Types.h" namespace arm_compute { @@ -38,6 +38,8 @@ class ITensor; class ICPPKernel : public IKernel { public: + static constexpr size_t default_mws = 1; /* Default minimum workload size value - no impact */ + /** Default destructor */ virtual ~ICPPKernel() = default; @@ -88,6 +90,20 @@ public: ARM_COMPUTE_UNUSED(tensors, window, info); } + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return Minimum workload size for requested configuration. + */ + virtual size_t get_mws(const CPUInfo &platform, size_t thread_count) const + { + ARM_COMPUTE_UNUSED(platform, thread_count); + + return default_mws; + } + /** Name of the kernel * * @return Kernel name diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h deleted file mode 100644 index c31d487a45..0000000000 --- a/arm_compute/core/CPP/ICPPSimpleKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICPPSIMPLEKERNEL_H -#define ARM_COMPUTE_ICPPSIMPLEKERNEL_H - -#include "arm_compute/core/CPP/ICPPKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for simple C++ kernels having 1 tensor input and 1 tensor output */ -class ICPPSimpleKernel : public ICPPKernel -{ -public: - /** Constructor */ - ICPPSimpleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICPPSimpleKernel(const ICPPSimpleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICPPSimpleKernel &operator=(const ICPPSimpleKernel &) = delete; - /** Allow instances of this class to be moved */ - ICPPSimpleKernel(ICPPSimpleKernel &&) = default; - /** Allow instances of this class to be moved */ - ICPPSimpleKernel &operator=(ICPPSimpleKernel &&) = default; - /** Default destructor */ - ~ICPPSimpleKernel() = default; - -protected: - /** Configure the kernel - * - * @param[in] input Source tensor. - * @param[out] output Destination tensor. - * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. - * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. - * @param[in] border_size (Optional) Size of the border. - */ - void configure(const ITensor *input, ITensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); - /** Static function to check if given info will lead to a valid configuration of @ref ICPPSimpleKernel. - * - * @param[in] input Source tensor info. - * @param[in] output Destination tensor info. - * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. - * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. - * @param[in] border_size (Optional) Size of the border. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration, - bool border_undefined = false, const BorderSize &border_size = BorderSize()); - -protected: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_ICPPSIMPLEKERNEL_H */ diff --git a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h index 068b37d80c..dd91595ea6 100644 --- a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h +++ b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h @@ -63,8 +63,16 @@ public: * @param[out] keeps_size (Optional) Number of filtered indices per class tensor of size [num_classes]. Data types supported: U32 * @param[in] info (Optional) BoxNMSLimitInfo information. */ - void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes, - ITensor *batch_splits_out = nullptr, ITensor *keeps = nullptr, ITensor *keeps_size = nullptr, const BoxNMSLimitInfo info = BoxNMSLimitInfo()); + void configure(const ITensor *scores_in, + const ITensor *boxes_in, + const ITensor *batch_splits_in, + ITensor *scores_out, + ITensor *boxes_out, + ITensor *classes, + ITensor *batch_splits_out = nullptr, + ITensor *keeps = nullptr, + ITensor *keeps_size = nullptr, + const BoxNMSLimitInfo info = BoxNMSLimitInfo()); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; @@ -74,9 +82,9 @@ public: void run_nmslimit(); private: - const ITensor *_scores_in; - const ITensor *_boxes_in; - const ITensor *_batch_splits_in; + const ITensor *_scores_in; + const ITensor *_boxes_in; + const ITensor *_batch_splits_in; ITensor *_scores_out; ITensor *_boxes_out; ITensor *_classes; diff --git a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h index e32b5d8f7b..d1f7f8670f 100644 --- a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h +++ b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H #define ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H -#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h" namespace arm_compute { @@ -65,7 +64,12 @@ public: * @param[in] iou_threshold The threshold used in non maximum suppression. * */ - void configure(const ITensor *input_bboxes, const ITensor *input_scores, ITensor *output_indices, unsigned int max_output_size, const float score_threshold, const float iou_threshold); + void configure(const ITensor *input_bboxes, + const ITensor *input_scores, + ITensor *output_indices, + unsigned int max_output_size, + const float score_threshold, + const float iou_threshold); /** Static function to check if given arguments will lead to a valid configuration of @ref CPPNonMaximumSuppressionKernel * @@ -77,8 +81,12 @@ public: * @param[in] iou_threshold The threshold used in non maximum suppression. * */ - static Status validate(const ITensorInfo *input_bboxes, const ITensorInfo *input_scores, const ITensorInfo *output_indices, unsigned int max_output_size, - const float score_threshold, const float iou_threshold); + static Status validate(const ITensorInfo *input_bboxes, + const ITensorInfo *input_scores, + const ITensorInfo *output_indices, + unsigned int max_output_size, + const float score_threshold, + const float iou_threshold); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; diff --git a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h index 1245dbc14c..7326a10e2f 100644 --- a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h +++ b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h @@ -69,7 +69,8 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k); + static Status + validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; diff --git a/arm_compute/core/Coordinates.h b/arm_compute/core/Coordinates.h index f6e1f4d282..d1240bb10a 100644 --- a/arm_compute/core/Coordinates.h +++ b/arm_compute/core/Coordinates.h @@ -42,8 +42,7 @@ public: * @param[in] coords Values to initialize the dimensions. */ template <typename... Ts> - constexpr Coordinates(Ts... coords) - : Dimensions{ coords... } + constexpr Coordinates(Ts... coords) : Dimensions{coords...} { } /** Allow instances of this class to be copy constructed */ @@ -57,5 +56,5 @@ public: /** Default destructor */ ~Coordinates() = default; }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_COORDINATES_H*/ diff --git a/arm_compute/core/CoreTypes.h b/arm_compute/core/CoreTypes.h new file mode 100644 index 0000000000..1a9db1937c --- /dev/null +++ b/arm_compute/core/CoreTypes.h @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2016-2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_ARM_COMPUTE_CORE_CORETYPES +#define ACL_ARM_COMPUTE_CORE_CORETYPES + +#include "arm_compute/core/Strides.h" + +#include "support/Half.h" + +/** CoreTypes.h groups together essential small types that are used across functions */ + +namespace arm_compute +{ +/** 16-bit floating point type */ +using half = half_float::half; +/** Permutation vector */ +using PermutationVector = Strides; + +/** Available channels */ +enum class Channel +{ + UNKNOWN, /** Unknown channel format */ + C0, /**< First channel (used by formats with unknown channel types). */ + C1, /**< Second channel (used by formats with unknown channel types). */ + C2, /**< Third channel (used by formats with unknown channel types). */ + C3, /**< Fourth channel (used by formats with unknown channel types). */ + R, /**< Red channel. */ + G, /**< Green channel. */ + B, /**< Blue channel. */ + A, /**< Alpha channel. */ + Y, /**< Luma channel. */ + U, /**< Cb/U channel. */ + V /**< Cr/V/Value channel. */ +}; + +/** Image colour formats */ +enum class Format +{ + UNKNOWN, /**< Unknown image format */ + U8, /**< 1 channel, 1 U8 per channel */ + S16, /**< 1 channel, 1 S16 per channel */ + U16, /**< 1 channel, 1 U16 per channel */ + S32, /**< 1 channel, 1 S32 per channel */ + U32, /**< 1 channel, 1 U32 per channel */ + S64, /**< 1 channel, 1 S64 per channel */ + U64, /**< 1 channel, 1 U64 per channel */ + BFLOAT16, /**< 16-bit brain floating-point number */ + F16, /**< 1 channel, 1 F16 per channel */ + F32, /**< 1 channel, 1 F32 per channel */ + UV88, /**< 2 channel, 1 U8 per channel */ + RGB888, /**< 3 channels, 1 U8 per channel */ + RGBA8888, /**< 4 channels, 1 U8 per channel */ + YUV444, /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */ + YUYV422, /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */ + NV12, /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */ + NV21, /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */ + IYUV, /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */ + UYVY422 /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */ +}; + +/** Available data types */ +enum class DataType +{ + UNKNOWN, /**< Unknown data type */ + U8, /**< unsigned 8-bit number */ + S8, /**< signed 8-bit number */ + QSYMM8, /**< quantized, symmetric fixed-point 8-bit number */ + QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number unsigned */ + QASYMM8_SIGNED, /**< quantized, asymmetric fixed-point 8-bit number signed */ + QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */ + U16, /**< unsigned 16-bit number */ + S16, /**< signed 16-bit number */ + QSYMM16, /**< quantized, symmetric fixed-point 16-bit number */ + QASYMM16, /**< quantized, asymmetric fixed-point 16-bit number */ + U32, /**< unsigned 32-bit number */ + S32, /**< signed 32-bit number */ + U64, /**< unsigned 64-bit number */ + S64, /**< signed 64-bit number */ + BFLOAT16, /**< 16-bit brain floating-point number */ + F16, /**< 16-bit floating-point number */ + F32, /**< 32-bit floating-point number */ + F64, /**< 64-bit floating-point number */ + SIZET /**< size_t */ +}; + +/** [DataLayout enum definition] **/ + +/** Supported tensor data layouts */ +enum class DataLayout +{ + UNKNOWN, /**< Unknown data layout */ + NCHW, /**< Num samples, channels, height, width */ + NHWC, /**< Num samples, height, width, channels */ + NCDHW, /**< Num samples, channels, depth, height, width */ + NDHWC /**< Num samples, depth, height, width, channels */ +}; +/** [DataLayout enum definition] **/ + +/** Supported tensor data layout dimensions */ +enum class DataLayoutDimension +{ + CHANNEL, /**< channel */ + HEIGHT, /**< height */ + WIDTH, /**< width */ + DEPTH, /**< depth */ + BATCHES /**< batches */ +}; + +/** Dimension rounding type when down-scaling on CNNs + * @note Used in pooling and convolution layer + */ +enum class DimensionRoundingType +{ + FLOOR, /**< Floor rounding */ + CEIL /**< Ceil rounding */ +}; + +class PadStrideInfo +{ +public: + /** Constructor + * + * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1. + * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1. + * @param[in] pad_x (Optional) Padding, in elements, across x. Defaults to 0. + * @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0. + * @param[in] round (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR. + */ + PadStrideInfo(unsigned int stride_x = 1, + unsigned int stride_y = 1, + unsigned int pad_x = 0, + unsigned int pad_y = 0, + DimensionRoundingType round = DimensionRoundingType::FLOOR) + : _stride(std::make_pair(stride_x, stride_y)), + _pad_left(pad_x), + _pad_top(pad_y), + _pad_right(pad_x), + _pad_bottom(pad_y), + _round_type(round) + { + } + /** Constructor + * + * @param[in] stride_x Stride, in elements, across x. + * @param[in] stride_y Stride, in elements, across y. + * @param[in] pad_left Padding across x on the left, in elements. + * @param[in] pad_right Padding across x on the right, in elements. + * @param[in] pad_top Padding across y on the top, in elements. + * @param[in] pad_bottom Padding across y on the bottom, in elements. + * @param[in] round Dimensions rounding. + */ + PadStrideInfo(unsigned int stride_x, + unsigned int stride_y, + unsigned int pad_left, + unsigned int pad_right, + unsigned int pad_top, + unsigned int pad_bottom, + DimensionRoundingType round) + : _stride(std::make_pair(stride_x, stride_y)), + _pad_left(pad_left), + _pad_top(pad_top), + _pad_right(pad_right), + _pad_bottom(pad_bottom), + _round_type(round) + { + } + /** Get the stride. + * + * @return a pair: stride x, stride y. + */ + std::pair<unsigned int, unsigned int> stride() const + { + return _stride; + } + /** Check whether the padding is symmetric. + * + * @return True if the padding is symmetric. + */ + bool padding_is_symmetric() const + { + return (_pad_left == _pad_right) && (_pad_top == _pad_bottom); + } + /** Get the padding. + * + * @note This should only be used when the padding is symmetric. + * + * @return a pair: padding left/right, padding top/bottom + */ + std::pair<unsigned int, unsigned int> pad() const + { + //this accessor should be used only when padding is symmetric + ARM_COMPUTE_ERROR_ON(!padding_is_symmetric()); + return std::make_pair(_pad_left, _pad_top); + } + + /** Get the left padding */ + unsigned int pad_left() const + { + return _pad_left; + } + /** Get the right padding */ + unsigned int pad_right() const + { + return _pad_right; + } + /** Get the top padding */ + unsigned int pad_top() const + { + return _pad_top; + } + /** Get the bottom padding */ + unsigned int pad_bottom() const + { + return _pad_bottom; + } + + /** Get the rounding type */ + DimensionRoundingType round() const + { + return _round_type; + } + + /** Check whether this has any padding */ + bool has_padding() const + { + return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0); + } + +private: + std::pair<unsigned int, unsigned int> _stride; + unsigned int _pad_left; + unsigned int _pad_top; + unsigned int _pad_right; + unsigned int _pad_bottom; + + DimensionRoundingType _round_type; +}; + +/** Memory layouts for the weights tensor. + * + * * UNSPECIFIED is used to select kernels that do not run in + * variable weights mode. + * + * * ANY is used to query the kernel database to retrieve any of the + * kernels that runs in variable weights mode. Once a kernel is + * found, the specific format expected by the kernel can be + * retrieved by the user for reordering the weights tensor + * accordingly. + * + * The other values OHWIo{interleave_by}i{block_by} describe the + * memory layout of a 4D tensor with layout OHWI that has been + * transformed into a 4D tensor with dimensions O'HWI' where: + * + * O' = first multiple of {interleave_by} s.t. O<=O' + * I' = first multiple of {block_by} s.t. I<=I' + * + * The total size of the dst tensor is O' x H x W x I' + * + * The access function of the tensor with layout + * OHWIo{interleave_by}i{block_by} and size O'HWI' is a 6-parameter + * access function, where the 6 parameters are computed as follows: + * + * x5 = floor(o/{interleave_by}) RANGE [0, O'/{interleave_by} -1] SIZE: O'/{interleave_by} + * + * x4 = h RANGE [0, H-1] SIZE: H + * x3 = w RANGE [0, W-1] SIZE: W + * x2 = floor(i/{block_by}) RANGE [0, I'/{block_by} -1] SIZE: I'/{block_by} + * x1 = o%{interleave_by} RANGE [0, {interleave_by} -1] SIZE: {interleave_by} + * x0 = i%{block_by} RANGE [0, {block_by} -1] SIZE: {block_by} + * TOTAL SIZE: O' * H * W * I' + * + * 4D 6D + * ----------------- ----------------------------------- + * value(o, h, w, i) = x5 * H * W * I' * {interleave_by} + * + x4 * W * I' * {interleave_by} + * + x3 * I' * {interleave_by} + * + x2 * {interleave_by} * {block_by} + * + x1 * {block_by} + * + x0 + * + * Notice that in arm_gemm the 4D tensor of dimension O'HWI' created + * for the OHWIo{interleave_by}i{block_by} format is in reality seen + * as a 2D tensor, where the number of rows is O'/{interleave_by} + * and the number of columns is {interleave_by} * H * W * I'. + * + * The postfix *_bf16 is for the memory layout needed for the + * fast-mode kernels, in which the weights are passed in bfloat16 + * format. + */ +enum class WeightFormat +{ + UNSPECIFIED = 0x1, + ANY = 0x2, + OHWI = 0x100100, + OHWIo2 = 0x100200, + OHWIo4 = 0x100400, + OHWIo8 = 0x100800, + OHWIo16 = 0x101000, + OHWIo32 = 0x102000, + OHWIo64 = 0x104000, + OHWIo128 = 0x108000, + OHWIo4i2 = 0x200400, + OHWIo4i2_bf16 = 0x200410, + OHWIo8i2 = 0x200800, + OHWIo8i2_bf16 = 0x200810, + OHWIo16i2 = 0x201000, + OHWIo16i2_bf16 = 0x201010, + OHWIo32i2 = 0x202000, + OHWIo32i2_bf16 = 0x202010, + OHWIo64i2 = 0x204000, + OHWIo64i2_bf16 = 0x204010, + OHWIo4i4 = 0x400400, + OHWIo4i4_bf16 = 0x400410, + OHWIo8i4 = 0x400800, + OHWIo8i4_bf16 = 0x400810, + OHWIo16i4 = 0x401000, + OHWIo16i4_bf16 = 0x401010, + OHWIo32i4 = 0x402000, + OHWIo32i4_bf16 = 0x402010, + OHWIo64i4 = 0x404000, + OHWIo64i4_bf16 = 0x404010, + OHWIo2i8 = 0x800200, + OHWIo4i8 = 0x800400, + OHWIo8i8 = 0x800800, + OHWIo16i8 = 0x801000, + OHWIo32i8 = 0x802000, + OHWIo64i8 = 0x804000 +}; + +} // namespace arm_compute +#endif /* ACL_ARM_COMPUTE_CORE_CORETYPES */ diff --git a/arm_compute/core/Dimensions.h b/arm_compute/core/Dimensions.h index 2ebfcd7f83..bb8692d70a 100644 --- a/arm_compute/core/Dimensions.h +++ b/arm_compute/core/Dimensions.h @@ -50,8 +50,7 @@ public: * @param[in] dims Values to initialize the dimensions. */ template <typename... Ts> - explicit Dimensions(Ts... dims) - : _id{ { static_cast<T>(dims)... } }, _num_dimensions{ sizeof...(dims) } + explicit Dimensions(Ts... dims) : _id{{static_cast<T>(dims)...}}, _num_dimensions{sizeof...(dims)} { } @@ -78,7 +77,7 @@ public: ARM_COMPUTE_ERROR_ON(dimension >= num_max_dimensions); _id[dimension] = value; // Don't increase the number of dimensions if the new dimension is 1 - if(increase_dim_unit || value != 1) + if (increase_dim_unit || value != 1) { _num_dimensions = std::max(_num_dimensions, dimension + 1); } @@ -108,7 +107,7 @@ public: void increment(size_t dim, T step = 1) { ARM_COMPUTE_ERROR_ON(dim >= _num_dimensions); - if((std::numeric_limits<T>::max() - _id[dim]) >= step) + if ((std::numeric_limits<T>::max() - _id[dim]) >= step) { _id[dim] += step; } @@ -162,7 +161,7 @@ public: const size_t last = std::min(_num_dimensions, first + n); - if(last > (first + 1)) + if (last > (first + 1)) { // Collapse dimensions into the first _id[first] = std::accumulate(&_id[first], &_id[last], 1, std::multiplies<T>()); @@ -196,7 +195,7 @@ public: void remove(size_t idx) { ARM_COMPUTE_ERROR_ON(_num_dimensions < 1); - if(idx >= _num_dimensions) + if (idx >= _num_dimensions) { return; } @@ -262,7 +261,7 @@ protected: ~Dimensions() = default; std::array<T, num_max_dimensions> _id; - size_t _num_dimensions{ 0 }; + size_t _num_dimensions{0}; }; /** Check that given dimensions are equal. @@ -289,5 +288,5 @@ inline bool operator!=(const Dimensions<T> &lhs, const Dimensions<T> &rhs) { return !(lhs == rhs); } -} +} // namespace arm_compute #endif /*ARM_COMPUTE_DIMENSIONS_H*/ diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h index 992d6bc71f..7a7033805a 100644 --- a/arm_compute/core/Error.h +++ b/arm_compute/core/Error.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2019, 2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -53,8 +53,7 @@ class Status { public: /** Default Constructor **/ - Status() - : _code(ErrorCode::OK), _error_description(" ") + Status() : _code(ErrorCode::OK), _error_description(" ") { } /** Default Constructor @@ -101,7 +100,7 @@ public: /** Throws a runtime exception in case it contains a valid error status */ void throw_if_error() const { - if(!bool(*this)) + if (!bool(*this)) { internal_throw_on_error(); } @@ -119,7 +118,7 @@ private: /** Creates an error containing the error message * * @param[in] error_code Error code - * @param[in] msg Message to display before aborting. + * @param[in] msg Message to display before abandoning. * * @return status containing the error */ @@ -131,7 +130,7 @@ Status create_error(ErrorCode error_code, std::string msg); * @param[in] func Function in which the error occurred. * @param[in] file File in which the error occurred. * @param[in] line Line in which the error occurred. - * @param[in] msg Message to display before aborting. + * @param[in] msg Message to display before abandoning. * * @return status containing the error */ @@ -141,7 +140,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] err Error status */ [[noreturn]] void throw_error(Status err); -} +} // namespace arm_compute /** To avoid unused variables warnings * * This is useful if for example a variable is only used @@ -156,7 +155,8 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] error_code Error code. * @param[in] msg Message to encapsulate. */ -#define ARM_COMPUTE_CREATE_ERROR(error_code, msg) arm_compute::create_error_msg(error_code, __func__, __FILE__, __LINE__, msg) +#define ARM_COMPUTE_CREATE_ERROR(error_code, msg) \ + arm_compute::create_error_msg(error_code, __func__, __FILE__, __LINE__, msg) /** Creates an error on location with a given message * @@ -164,9 +164,10 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] func Function in which the error occurred. * @param[in] file File in which the error occurred. * @param[in] line Line in which the error occurred. - * @param[in] msg Message to display before aborting. + * @param[in] msg Message to display before abandoning. */ -#define ARM_COMPUTE_CREATE_ERROR_LOC(error_code, func, file, line, msg) arm_compute::create_error_msg(error_code, func, file, line, msg) +#define ARM_COMPUTE_CREATE_ERROR_LOC(error_code, func, file, line, msg) \ + arm_compute::create_error_msg(error_code, func, file, line, msg) /** Creates an error on location with a given message. Accepts a message format * and a variable list of arguments matching the format description. @@ -178,14 +179,14 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] msg Error description message format. * @param[in] ... List of arguments matching the format description. */ -#define ARM_COMPUTE_CREATE_ERROR_LOC_VAR(error_code, func, file, line, msg, ...) \ - do \ - { \ - std::array<char, 512> out{ 0 }; \ - int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \ - snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \ - arm_compute::create_error(error_code, std::string(out.data())); \ - } while(false) +#define ARM_COMPUTE_CREATE_ERROR_LOC_VAR(error_code, func, file, line, msg, ...) \ + do \ + { \ + std::array<char, 512> out{0}; \ + int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \ + snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \ + arm_compute::create_error(error_code, std::string(out.data())); \ + } while (false) /** An error is returned with the given description. * @@ -195,7 +196,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file do \ { \ return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, __VA_ARGS__); \ - } while(false) + } while (false) /** Checks if a status contains an error and returns it * @@ -204,18 +205,18 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file #define ARM_COMPUTE_RETURN_ON_ERROR(status) \ do \ { \ - if(!bool(status)) \ + const auto s = status; \ + if (!bool(s)) \ { \ - return status; \ + return s; \ } \ - } while(false) + } while (false) /** Checks if an error value is valid if not throws an exception with the error * * @param[in] error Error value to check. */ -#define ARM_COMPUTE_THROW_ON_ERROR(error) \ - error.throw_if_error(); +#define ARM_COMPUTE_THROW_ON_ERROR(error) error.throw_if_error(); /** If the condition is true, an error is returned. Accepts a message format * and a variable list of arguments matching the format description. @@ -227,28 +228,29 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file #define ARM_COMPUTE_RETURN_ERROR_ON_MSG_VAR(cond, msg, ...) \ do \ { \ - if(cond) \ + if (cond) \ { \ - std::array<char, 512> out{ 0 }; \ + std::array<char, 512> out{0}; \ int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", __func__, __FILE__, __LINE__); \ snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \ return arm_compute::create_error(arm_compute::ErrorCode::RUNTIME_ERROR, std::string(out.data())); \ } \ - } while(false) + } while (false) /** If the condition is true, an error is returned * * @param[in] cond Condition to evaluate. * @param[in] msg Error description message */ -#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg) \ - do \ - { \ - if(cond) \ - { \ - return arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, __func__, __FILE__, __LINE__, msg); \ - } \ - } while(false) +#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg) \ + do \ + { \ + if (cond) \ + { \ + return arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, __func__, __FILE__, __LINE__, \ + msg); \ + } \ + } while (false) /** If the condition is true, an error is thrown. Accepts a message format * and a variable list of arguments matching the format description. @@ -260,17 +262,17 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] msg Error description message format. * @param[in] ... List of arguments matching the format description. */ -#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(cond, func, file, line, msg, ...) \ - do \ - { \ - if(cond) \ - { \ - std::array<char, 512> out{ 0 }; \ - int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \ - snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \ - return arm_compute::create_error(ErrorCode::RUNTIME_ERROR, std::string(out.data())); \ - } \ - } while(false) +#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(cond, func, file, line, msg, ...) \ + do \ + { \ + if (cond) \ + { \ + std::array<char, 512> out{0}; \ + int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \ + snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \ + return arm_compute::create_error(ErrorCode::RUNTIME_ERROR, std::string(out.data())); \ + } \ + } while (false) /** If the condition is true, an error is thrown. * @@ -283,18 +285,17 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file #define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(cond, func, file, line, msg) \ do \ { \ - if(cond) \ + if (cond) \ { \ return arm_compute::create_error_msg(ErrorCode::RUNTIME_ERROR, func, file, line, msg); \ } \ - } while(false) + } while (false) /** If the condition is true, an error is returned * * @param[in] cond Condition to evaluate */ -#define ARM_COMPUTE_RETURN_ERROR_ON(cond) \ - ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, #cond) +#define ARM_COMPUTE_RETURN_ERROR_ON(cond) ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, #cond) /** If the condition is true, an error is returned * @@ -313,11 +314,12 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] line Line in which the error occurred. * @param[in] msg Message to display. */ -#define ARM_COMPUTE_THROW_ERROR(func, file, line, msg) \ - do \ - { \ - arm_compute::throw_error(arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, func, file, line, msg)); \ - } while(false) +#define ARM_COMPUTE_THROW_ERROR(func, file, line, msg) \ + do \ + { \ + arm_compute::throw_error( \ + arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, func, file, line, msg)); \ + } while (false) /** Print the given message then throw an std::runtime_error. Accepts a message format * and a variable list of arguments matching the format description. @@ -331,11 +333,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file #define ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, ...) \ do \ { \ - std::array<char, 512> out{ 0 }; \ - int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \ + std::array<char, 512> out{0}; \ + int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \ snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \ arm_compute::throw_error(arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, std::string(out.data()))); \ - } while(false) + } while (false) /** Print the given message then throw an std::runtime_error. Accepts a message format * and a variable list of arguments matching the format description. @@ -360,7 +362,8 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] msg Error description message format. * @param[in] ... List of arguments matching the format description. */ -#define ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, msg, ...) ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, __VA_ARGS__) // NOLINT +#define ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, msg, ...) \ + ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, __VA_ARGS__) // NOLINT /** Print the given message then throw an std::runtime_error. * @@ -379,11 +382,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file #define ARM_COMPUTE_EXIT_ON_MSG(cond, msg) \ do \ { \ - if(cond) \ + if (cond) \ { \ ARM_COMPUTE_ERROR(msg); \ } \ - } while(false) + } while (false) /** If the condition is true, the given message is printed and program exits. Accepts a message format * and a variable list of arguments matching the format description. @@ -395,27 +398,25 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file #define ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, ...) \ do \ { \ - if(cond) \ + if (cond) \ { \ ARM_COMPUTE_ERROR_VAR(msg, __VA_ARGS__); \ } \ - } while(false) + } while (false) #ifdef ARM_COMPUTE_ASSERTS_ENABLED /** Checks if a status value is valid if not throws an exception with the error * * @param[in] status Status value to check. */ -#define ARM_COMPUTE_ERROR_THROW_ON(status) \ - status.throw_if_error() +#define ARM_COMPUTE_ERROR_THROW_ON(status) status.throw_if_error() /** If the condition is true, the given message is printed and an exception is thrown * * @param[in] cond Condition to evaluate. * @param[in] msg Message to display. */ -#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg) \ - ARM_COMPUTE_EXIT_ON_MSG(cond, msg) +#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg) ARM_COMPUTE_EXIT_ON_MSG(cond, msg) /** If the condition is true, the given message is printed and an exception is thrown. Accepts a message format * and a variable list of arguments matching the format description. @@ -424,8 +425,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] msg Error description message format. * @param[in] ... List of arguments matching the format description. */ -#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg, ...) \ - ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, __VA_ARGS__) +#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg, ...) ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, __VA_ARGS__) /** If the condition is true, the given message is printed and an exception is thrown. * @@ -438,11 +438,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file #define ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, ...) \ do \ { \ - if(cond) \ + if (cond) \ { \ ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, __VA_ARGS__); \ } \ - } while(false) + } while (false) /** If the condition is true, the given message is printed and an exception is thrown, otherwise value is returned * @@ -463,8 +463,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * * @param[in] cond Condition to evaluate. */ -#define ARM_COMPUTE_ERROR_ON(cond) \ - ARM_COMPUTE_ERROR_ON_MSG(cond, #cond) +#define ARM_COMPUTE_ERROR_ON(cond) ARM_COMPUTE_ERROR_ON_MSG(cond, #cond) /** If the condition is true then an error message is printed and an exception thrown * diff --git a/arm_compute/core/GPUTarget.h b/arm_compute/core/GPUTarget.h index d9994b6cf0..b107a52d9f 100644 --- a/arm_compute/core/GPUTarget.h +++ b/arm_compute/core/GPUTarget.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_GPUTARGET_H -#define ARM_COMPUTE_GPUTARGET_H +#ifndef ACL_ARM_COMPUTE_CORE_GPUTARGET_H +#define ACL_ARM_COMPUTE_CORE_GPUTARGET_H #include "support/Traits.h" @@ -33,25 +33,38 @@ namespace arm_compute /** Available GPU Targets */ enum class GPUTarget { - UNKNOWN = 0x101, - GPU_ARCH_MASK = 0xF00, - MIDGARD = 0x100, - BIFROST = 0x200, - VALHALL = 0x300, - T600 = 0x110, - T700 = 0x120, - T800 = 0x130, - G71 = 0x210, - G72 = 0x220, - G51 = 0x230, - G51BIG = 0x231, - G51LIT = 0x232, - G52 = 0x240, - G52LIT = 0x241, - G76 = 0x250, - G77 = 0x310, - G78 = 0x320, - TODX = 0x330, + UNKNOWN = 0x101, + GPU_ARCH_MASK = 0xF00, + GPU_GENERATION_MASK = 0x0F0, + MIDGARD = 0x100, + BIFROST = 0x200, + VALHALL = 0x300, + FIFTHGEN = 0X400, + T600 = 0x110, + T700 = 0x120, + T800 = 0x130, + G71 = 0x210, + G72 = 0x220, + G51 = 0x221, + G51BIG = 0x222, + G51LIT = 0x223, + G31 = 0x224, + G76 = 0x230, + G52 = 0x231, + G52LIT = 0x232, + G77 = 0x310, + G57 = 0x311, + G78 = 0x320, + G68 = 0x321, + G78AE = 0x330, + G710 = 0x340, + G610 = 0x341, + G510 = 0x342, + G310 = 0x343, + G715 = 0x350, + G615 = 0x351, + G720 = 0x410, + G620 = 0X411 }; /** Enable bitwise operations on GPUTarget enumerations */ @@ -104,4 +117,4 @@ inline bool gpu_target_is_in(GPUTarget target_to_check, GPUTarget target) return target_to_check == target; } } // namespace arm_compute -#endif /* ARM_COMPUTE_GPUTARGET_H */ +#endif // ACL_ARM_COMPUTE_CORE_GPUTARGET_H diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h index b6635aba6d..960201510a 100644 --- a/arm_compute/core/Helpers.h +++ b/arm_compute/core/Helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -55,6 +55,16 @@ public: */ Iterator(const ITensor *tensor, const Window &window); + /** Create a container iterator for the tensor with the specified number of dimensions, stride, buffer pointer and window. + * + * @param[in] num_dims The number of dimensions. + * @param[in] strides The strides in bytes. + * @param[in] buffer The data buffer. + * @param[in] offset The offset in bytes from the beginning of the buffer to the first element of the tensor. + * @param[in] window The window which will be used to iterate over the tensor. + */ + Iterator(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &window); + /** Increment the iterator along the specified dimension of the step value associated to the dimension. * * @warning It is the caller's responsibility to call increment(dimension+1) when reaching the end of a dimension, the iterator will not check for overflow. @@ -86,13 +96,22 @@ public: void reset(size_t dimension); private: + /** Initialize a container iterator for the tensor with the specified number of dimensions, stride, buffer pointer and window. + * + * @param[in] num_dims The number of dimensions. + * @param[in] strides The strides in bytes. + * @param[in] buffer The data buffer. + * @param[in] offset The offset in bytes from the beginning of the buffer to the first element of the tensor. + * @param[in] window The window which will be used to iterate over the tensor. + */ + void initialize(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &window); + uint8_t *_ptr; class Dimension { public: - constexpr Dimension() - : _dim_start(0), _stride(0) + constexpr Dimension() : _dim_start(0), _stride(0) { } @@ -112,7 +131,7 @@ private: * @param[in,out] iterators Tensor iterators which will be updated by this function before calling lambda_function. */ template <typename L, typename... Ts> -inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators); +inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators); /** Permutes given Dimensions according to a permutation vector * @@ -125,7 +144,7 @@ template <typename T> inline void permute(Dimensions<T> &dimensions, const PermutationVector &perm) { auto dimensions_copy = utility::make_array<Dimensions<T>::num_max_dimensions>(dimensions.begin(), dimensions.end()); - for(unsigned int i = 0; i < perm.num_dimensions(); ++i) + for (unsigned int i = 0; i < perm.num_dimensions(); ++i) { T dimension_val = (perm[i] < dimensions.num_dimensions()) ? dimensions_copy[perm[i]] : 0; dimensions.set(i, dimension_val); @@ -142,7 +161,7 @@ inline void permute(Dimensions<T> &dimensions, const PermutationVector &perm) inline void permute(TensorShape &shape, const PermutationVector &perm) { TensorShape shape_copy = shape; - for(unsigned int i = 0; i < perm.num_dimensions(); ++i) + for (unsigned int i = 0; i < perm.num_dimensions(); ++i) { size_t dimension_val = (perm[i] < shape.num_dimensions()) ? shape_copy[perm[i]] : 1; shape.set(i, dimension_val, false, false); // Avoid changes in _num_dimension @@ -159,8 +178,11 @@ inline void permute(TensorShape &shape, const PermutationVector &perm) * * @return The corresponding valid region */ -ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info, const TensorShape &dst_shape, - InterpolationPolicy interpolate_policy, SamplingPolicy sampling_policy, bool border_undefined); +ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info, + const TensorShape &dst_shape, + InterpolationPolicy interpolate_policy, + SamplingPolicy sampling_policy, + bool border_undefined); /** Convert a linear index into n-dimensional coordinates. * @@ -180,6 +202,22 @@ inline Coordinates index2coords(const TensorShape &shape, int index); */ inline int coords2index(const TensorShape &shape, const Coordinates &coord); +/** Returns a static map used to find an index or dimension based on a data layout + * + * *** Layouts *** + * + * *** 4D *** + * [N C H W] + * [3 2 1 0] + * [N H W C] + * + * * *** 5D *** + * [N C D H W] + * [4 3 2 1 0] + * [N D H W C] + */ +const std::map<DataLayout, std::vector<DataLayoutDimension>> &get_layout_map(); + /** Get the index of the given dimension. * * @param[in] data_layout The data layout. @@ -187,7 +225,8 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord); * * @return The int conversion of the requested data layout index. */ -inline size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension); +inline size_t get_data_layout_dimension_index(const DataLayout &data_layout, + const DataLayoutDimension &data_layout_dimension); /** Get the DataLayoutDimension of a given index and layout. * @@ -196,7 +235,7 @@ inline size_t get_data_layout_dimension_index(const DataLayout data_layout, cons * * @return The dimension which this index is requested for. */ -inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout data_layout, const size_t index); +inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout &data_layout, const size_t index); /** Calculate the number of output tiles required by Winograd Convolution layer. This utility function can be used by the Winograd input transform * to know the number of tiles on the x and y direction @@ -208,10 +247,17 @@ inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout data * * @return the number of output tiles along the x and y directions of size "output_tile_size" */ -inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims, const Size2D &kernel_size, const Size2D &output_tile_size, const PadStrideInfo &conv_info) +inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims, + const Size2D &kernel_size, + const Size2D &output_tile_size, + const PadStrideInfo &conv_info) { - int num_tiles_x = std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast<float>(output_tile_size.width)); - int num_tiles_y = std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast<float>(output_tile_size.height)); + int num_tiles_x = + std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / + static_cast<float>(output_tile_size.width)); + int num_tiles_y = + std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / + static_cast<float>(output_tile_size.height)); // Clamp in case we provide paddings but we have 1D convolution num_tiles_x = std::min(num_tiles_x, static_cast<int>(in_dims.width)); @@ -240,7 +286,7 @@ inline T wrap_around(T x, T m) */ inline Coordinates &convert_negative_axis(Coordinates &coords, int max_value) { - for(unsigned int i = 0; i < coords.num_dimensions(); ++i) + for (unsigned int i = 0; i < coords.num_dimensions(); ++i) { coords[i] = wrap_around(coords[i], max_value); } diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl index a960876074..60a21e9418 100644 --- a/arm_compute/core/Helpers.inl +++ b/arm_compute/core/Helpers.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,12 +32,9 @@ template <size_t dimension> struct IncrementIterators { template <typename T, typename... Ts> - static void unroll(T &&it, Ts &&... iterators) + static void unroll(T &&it, Ts &&...iterators) { - auto increment = [](T && it) - { - it.increment(dimension); - }; + auto increment = [](T &&it) { it.increment(dimension); }; utility::for_each(increment, std::forward<T>(it), std::forward<Ts>(iterators)...); } static void unroll() @@ -50,14 +47,14 @@ template <size_t dim> struct ForEachDimension { template <typename L, typename... Ts> - static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators) + static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&...iterators) { const auto &d = w[dim - 1]; - for(auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators < dim - 1 >::unroll(iterators...)) + for (auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators<dim - 1>::unroll(iterators...)) { id.set(dim - 1, v); - ForEachDimension < dim - 1 >::unroll(w, id, lambda_function, iterators...); + ForEachDimension<dim - 1>::unroll(w, id, lambda_function, iterators...); } } }; @@ -66,7 +63,7 @@ template <> struct ForEachDimension<0> { template <typename L, typename... Ts> - static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators) + static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&...iterators) { ARM_COMPUTE_UNUSED(w, iterators...); lambda_function(id); @@ -74,49 +71,60 @@ struct ForEachDimension<0> }; template <typename L, typename... Ts> -inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators) +inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators) { w.validate(); - for(unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i) + for (unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i) { ARM_COMPUTE_ERROR_ON(w[i].step() == 0); } Coordinates id; - ForEachDimension<Coordinates::num_max_dimensions>::unroll(w, id, std::forward<L>(lambda_function), std::forward<Ts>(iterators)...); + ForEachDimension<Coordinates::num_max_dimensions>::unroll(w, id, std::forward<L>(lambda_function), + std::forward<Ts>(iterators)...); } -inline constexpr Iterator::Iterator() - : _ptr(nullptr), _dims() +inline constexpr Iterator::Iterator() : _ptr(nullptr), _dims() { } -inline Iterator::Iterator(const ITensor *tensor, const Window &win) - : Iterator() +inline Iterator::Iterator(const ITensor *tensor, const Window &win) : Iterator() { ARM_COMPUTE_ERROR_ON(tensor == nullptr); ARM_COMPUTE_ERROR_ON(tensor->info() == nullptr); - const ITensorInfo *info = tensor->info(); - const Strides &strides = info->strides_in_bytes(); + initialize(tensor->info()->num_dimensions(), tensor->info()->strides_in_bytes(), tensor->buffer(), + tensor->info()->offset_first_element_in_bytes(), win); +} + +inline Iterator::Iterator(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &win) + : Iterator() +{ + initialize(num_dims, strides, buffer, offset, win); +} - _ptr = tensor->buffer() + info->offset_first_element_in_bytes(); +inline void +Iterator::initialize(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(buffer == nullptr); + + _ptr = buffer + offset; //Initialize the stride for each dimension and calculate the position of the first element of the iteration: - for(unsigned int n = 0; n < info->num_dimensions(); ++n) + for (unsigned int n = 0; n < num_dims; ++n) { _dims[n]._stride = win[n].step() * strides[n]; std::get<0>(_dims)._dim_start += static_cast<size_t>(strides[n]) * win[n].start(); } //Copy the starting point to all the dimensions: - for(unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n) + for (unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n) { _dims[n]._dim_start = std::get<0>(_dims)._dim_start; } - ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(win, info->num_dimensions()); + ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(win, num_dims); } inline void Iterator::increment(const size_t dimension) @@ -125,7 +133,7 @@ inline void Iterator::increment(const size_t dimension) _dims[dimension]._dim_start += _dims[dimension]._stride; - for(unsigned int n = 0; n < dimension; ++n) + for (unsigned int n = 0; n < dimension; ++n) { _dims[n]._dim_start = _dims[dimension]._dim_start; } @@ -147,7 +155,7 @@ inline void Iterator::reset(const size_t dimension) _dims[dimension]._dim_start = _dims[dimension + 1]._dim_start; - for(unsigned int n = 0; n < dimension; ++n) + for (unsigned int n = 0; n < dimension; ++n) { _dims[n]._dim_start = _dims[dimension]._dim_start; } @@ -160,9 +168,9 @@ inline Coordinates index2coords(const TensorShape &shape, int index) ARM_COMPUTE_ERROR_ON_MSG(index < 0 || index >= num_elements, "Index has to be in [0, num_elements]!"); ARM_COMPUTE_ERROR_ON_MSG(num_elements == 0, "Cannot create coordinate from empty shape!"); - Coordinates coord{ 0 }; + Coordinates coord{0}; - for(int d = shape.num_dimensions() - 1; d >= 0; --d) + for (int d = shape.num_dimensions() - 1; d >= 0; --d) { num_elements /= shape[d]; coord.set(d, index / num_elements); @@ -181,7 +189,7 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord) int index = 0; int stride = 1; - for(unsigned int d = 0; d < coord.num_dimensions(); ++d) + for (unsigned int d = 0; d < coord.num_dimensions(); ++d) { index += coord[d] * stride; stride *= shape[d]; @@ -190,61 +198,23 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord) return index; } -inline size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension) +inline size_t get_data_layout_dimension_index(const DataLayout &data_layout, + const DataLayoutDimension &data_layout_dimension) { - ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, "Cannot retrieve the dimension index for an unknown layout!"); - - /* Return the index based on the data layout - * [N C H W] - * [3 2 1 0] - * [N H W C] - */ - switch(data_layout_dimension) - { - case DataLayoutDimension::CHANNEL: - return (data_layout == DataLayout::NCHW) ? 2 : 0; - break; - case DataLayoutDimension::HEIGHT: - return (data_layout == DataLayout::NCHW) ? 1 : 2; - break; - case DataLayoutDimension::WIDTH: - return (data_layout == DataLayout::NCHW) ? 0 : 1; - break; - case DataLayoutDimension::BATCHES: - return 3; - break; - default: - break; - } - ARM_COMPUTE_ERROR("Data layout index not supported!"); + ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, + "Cannot retrieve the dimension index for an unknown layout!"); + const auto &dims = get_layout_map().at(data_layout); + const auto &it = std::find(dims.cbegin(), dims.cend(), data_layout_dimension); + ARM_COMPUTE_ERROR_ON_MSG(it == dims.cend(), "Invalid dimension for the given layout."); + return it - dims.cbegin(); } -inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout data_layout, const size_t index) +inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout &data_layout, const size_t index) { - ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, "Cannot retrieve the dimension index for an unknown layout!"); - - /* Return the index based on the data layout - * [N C H W] - * [3 2 1 0] - * [N H W C] - */ - switch(index) - { - case 0: - return (data_layout == DataLayout::NCHW) ? DataLayoutDimension::WIDTH : DataLayoutDimension::CHANNEL; - break; - case 1: - return (data_layout == DataLayout::NCHW) ? DataLayoutDimension::HEIGHT : DataLayoutDimension::WIDTH; - break; - case 2: - return (data_layout == DataLayout::NCHW) ? DataLayoutDimension::CHANNEL : DataLayoutDimension::HEIGHT; - break; - case 3: - return DataLayoutDimension::BATCHES; - break; - default: - ARM_COMPUTE_ERROR("Index value not supported!"); - break; - } + ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, + "Cannot retrieve the layout dimension for an unknown layout!"); + const auto &dims = get_layout_map().at(data_layout); + ARM_COMPUTE_ERROR_ON_MSG(index >= dims.size(), "Invalid index for the given layout."); + return dims[index]; } } // namespace arm_compute diff --git a/arm_compute/core/IAccessWindow.h b/arm_compute/core/IAccessWindow.h index 880f6d6b27..9c9fb90915 100644 --- a/arm_compute/core/IAccessWindow.h +++ b/arm_compute/core/IAccessWindow.h @@ -100,7 +100,10 @@ public: * @return a valid region. * */ - virtual ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const = 0; + virtual ValidRegion compute_valid_region(const Window &window, + ValidRegion input_valid_region, + bool border_undefined, + BorderSize border_size) const = 0; }; /** Implementation of a rectangular access pattern. */ @@ -161,7 +164,10 @@ public: * @param[in] border_undefined (Optional) Undefined borders are excluded from the valid region. * @param[in] border_size (Optional) Size of the border around the XY-plane of the tensor. */ - void set_valid_region(const Window &window, const ValidRegion &input_valid_region, bool border_undefined = false, const BorderSize &border_size = BorderSize(0)); + void set_valid_region(const Window &window, + const ValidRegion &input_valid_region, + bool border_undefined = false, + const BorderSize &border_size = BorderSize(0)); /** Compute the valid region based on access pattern, valid region of the inputs and border mode. * @@ -189,7 +195,10 @@ public: * @return a valid region. * */ - ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; + ValidRegion compute_valid_region(const Window &window, + ValidRegion input_valid_region, + bool border_undefined, + BorderSize border_size) const override; bool update_window_if_needed(Window &window) const override; bool update_padding_if_needed(const Window &window) override; diff --git a/arm_compute/core/IArray.h b/arm_compute/core/IArray.h index 6edbc1d5d5..3471fc9a86 100644 --- a/arm_compute/core/IArray.h +++ b/arm_compute/core/IArray.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_IARRAY_H #include "arm_compute/core/Error.h" + #include <cstddef> #include <cstdint> @@ -36,14 +37,12 @@ class IArray { public: /** Default constructor */ - IArray() - : _num_values(0), _max_size(0) {}; + IArray() : _num_values(0), _max_size(0){}; /** Constructor: initializes an array which can contain up to max_num_points values * * @param[in] max_num_values Maximum number of values the array will be able to stored */ - IArray(size_t max_num_values) - : _num_values(0), _max_size(max_num_values) + IArray(size_t max_num_values) : _num_values(0), _max_size(max_num_values) { } /** Maximum number of values which can be stored in this array @@ -73,7 +72,7 @@ public: bool push_back(const T &val) { ARM_COMPUTE_ERROR_ON(0 == _max_size); - if(_num_values >= max_num_values()) + if (_num_values >= max_num_values()) { _num_values = max_num_values() + 1; return false; @@ -142,5 +141,5 @@ using IInt16Array = IArray<int16_t>; using IInt32Array = IArray<int32_t>; /** Interface for Array of floats. */ using IFloatArray = IArray<float>; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_IARRAY_H */ diff --git a/arm_compute/core/IKernel.h b/arm_compute/core/IKernel.h index 98fd18cc91..403a2c724e 100644 --- a/arm_compute/core/IKernel.h +++ b/arm_compute/core/IKernel.h @@ -73,5 +73,5 @@ protected: private: Window _window; }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_IKERNEL_H */ diff --git a/arm_compute/core/ITensor.h b/arm_compute/core/ITensor.h index 131ee205ea..aad8313261 100644 --- a/arm_compute/core/ITensor.h +++ b/arm_compute/core/ITensor.h @@ -90,11 +90,13 @@ public: bool is_used() const; /** Marks a tensor as unused */ void mark_as_unused() const; + /** Marks a tensor as used */ + void mark_as_used() const; private: - mutable bool _is_used = { true }; /**< Flag that marks if the tensor is used or not */ + mutable bool _is_used = {true}; /**< Flag that marks if the tensor is used or not */ }; using IImage = ITensor; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_ITENSOR_H */ diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h index 0171e31086..c42f4b57a1 100644 --- a/arm_compute/core/ITensorInfo.h +++ b/arm_compute/core/ITensorInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,19 +28,28 @@ #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/Utility.h" + #include "support/ICloneable.h" #include <cstddef> namespace arm_compute { +class QuantizationInfo; +// Note: Any changes to the fields of the class below that have setters should be mirrored +// (if possible) in the auto_init_if_empty function in AutoConfiguration.h + /** Store the tensor's metadata */ class ITensorInfo : public misc::ICloneable<ITensorInfo> { public: - using TensorDimsState = Coordinates; + using TensorDimsState = std::vector<int>; + /** An id that uniquely identifies an ITensorInfo within some domain (e.g. a workload) + */ + using Id = int32_t; + /** An invalid tensor id within a domain */ + static constexpr Id invalid_tensor_id = 0; /** Get the value representing dynamic dimension state * * @return Value representing dynamic dimension state @@ -137,6 +146,17 @@ public: * @return True if the strides or the offset to the first element have changed. */ virtual bool auto_padding() = 0; + /** Set the lock paddings flag of the tensor. + * It should be set to True, when the tensor could be mapped to camera or frame buffer. + * + * @return Reference to this ITensorInfo object + */ + virtual ITensorInfo &set_lock_paddings(bool flag) = 0; + /** Get the lock paddings flag value + * + * @return lock paddings flag value + */ + virtual bool lock_paddings() const = 0; /** Update the offset to the first element, the strides and the total size. * * @note This function can only increase the offset, strides and total size. @@ -240,6 +260,11 @@ public: * @return True if its dynamic else false */ virtual bool is_dynamic() const = 0; + /** Flag indicating whether the values of the tensor are constant, meaning that they can change on kernel/function execution. + * + * @return True if values are constant else false + */ + virtual bool are_values_constant() const = 0; /** Set the flag whether the tensor size can be changed. * * @param[in] is_resizable Flag that marks the tensor if it can be changed or not. @@ -247,6 +272,13 @@ public: * @return Reference to this ITensorInfo object */ virtual ITensorInfo &set_is_resizable(bool is_resizable) = 0; + /** Set the flag whether the tensor values can change during kernel/function execution. + * + * @param[in] are_values_constant Flag that marks the tensor values if they can be changed or not. + * + * @return Reference to this ITensorInfo object + */ + virtual ITensorInfo &set_are_values_constant(bool are_values_constant) = 0; /** Valid region of the tensor. All elements in the valid region have defined values, i.e. are not undefined. * * @return The valid region. @@ -268,7 +300,20 @@ public: * @return A DataLayout containing the layout data information. */ virtual DataLayout data_layout() const = 0; - + /** Get the workload tensor id of the tensor. + * + * @return Workload tensor id of the tensor + */ + virtual Id id() const = 0; + /** Set the tensor id + */ + virtual ITensorInfo &set_id(ITensorInfo::Id id) = 0; + /** Check if the tensor id is valid + */ + bool has_valid_id() const + { + return id() != invalid_tensor_id; + } /** If infos are broadcast compatible tensor info's, return the broadcasted shape and the intersection of * the broadcasted valid regions of the tensors. * @@ -284,23 +329,23 @@ public: * not broadcast compatible. */ template <typename... Infos> - static std::pair<TensorShape, ValidRegion> broadcast_shape_and_valid_region(const Infos &... infos) + static std::pair<TensorShape, ValidRegion> broadcast_shape_and_valid_region(const Infos &...infos) { TensorShape bc_shape = TensorShape::broadcast_shape(infos.tensor_shape()...); - ValidRegion bc_valid_region{ Coordinates(), bc_shape }; + ValidRegion bc_valid_region{Coordinates(), bc_shape}; - auto broadcast_valid_region = [&bc_valid_region](const ITensorInfo & info) + auto broadcast_valid_region = [&bc_valid_region](const ITensorInfo &info) { - if(info.num_dimensions() != 0) + if (info.num_dimensions() != 0) { - for(size_t d = 0; d < bc_valid_region.shape.num_dimensions(); ++d) + for (size_t d = 0; d < bc_valid_region.shape.num_dimensions(); ++d) { const bool is_broadcast = (info.tensor_shape()[d] == 1); const int anchor_max = std::max(bc_valid_region.anchor[d], info.valid_region().anchor[d]); const size_t valid_min = std::min(bc_valid_region.shape[d], info.valid_region().shape[d]); - if(!is_broadcast || (valid_min == 0)) + if (!is_broadcast || (valid_min == 0)) { bc_valid_region.anchor.set(d, anchor_max); bc_valid_region.shape.set(d, valid_min); diff --git a/arm_compute/core/ITensorPack.h b/arm_compute/core/ITensorPack.h index 17b7241862..f456c50769 100644 --- a/arm_compute/core/ITensorPack.h +++ b/arm_compute/core/ITensorPack.h @@ -42,18 +42,16 @@ public: struct PackElement { PackElement() = default; - PackElement(int id, ITensor *tensor) - : id(id), tensor(tensor), ctensor(nullptr) + PackElement(int id, ITensor *tensor) : id(id), tensor(tensor), ctensor(nullptr) { } - PackElement(int id, const ITensor *ctensor) - : id(id), tensor(nullptr), ctensor(ctensor) + PackElement(int id, const ITensor *ctensor) : id(id), tensor(nullptr), ctensor(ctensor) { } - int id{ -1 }; - ITensor *tensor{ nullptr }; - const ITensor *ctensor{ nullptr }; + int id{-1}; + ITensor *tensor{nullptr}; + const ITensor *ctensor{nullptr}; }; public: diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h index 6c1fc74b1e..168a06a55c 100644 --- a/arm_compute/core/KernelDescriptors.h +++ b/arm_compute/core/KernelDescriptors.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,116 +21,139 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H -#define ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H +#ifndef ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H +#define ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" namespace arm_compute { /** Descriptor for FFT scale kernels */ struct FFTScaleKernelInfo { - float scale{ 0.f }; /**< Axis to perform the kernel on. */ - bool conjugate{ true }; /**< Flag to conjugate the output/ */ + float scale{0.f}; /**< Axis to perform the kernel on. */ + bool conjugate{true}; /**< Flag to conjugate the output/ */ }; /** Descriptor for FFT digit reverse kernels */ struct FFTDigitReverseKernelInfo { - unsigned int axis{ 0 }; /**< Axis to perform the kernel on. */ - bool conjugate{ false }; /**< Flag to conjugate the output/ */ + unsigned int axis{0}; /**< Axis to perform the kernel on. */ + bool conjugate{false}; /**< Flag to conjugate the output/ */ }; /** Descriptor used by the FFT core kernels */ struct FFTRadixStageKernelInfo { - unsigned int axis{ 0 }; /**< Axis to run the kernel on. */ - unsigned int radix{ 0 }; /**< Radix to use. */ - unsigned int Nx{ 0 }; /**< Nx coefficient. */ - bool is_first_stage{ false }; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */ + unsigned int axis{0}; /**< Axis to run the kernel on. */ + unsigned int radix{0}; /**< Radix to use. */ + unsigned int Nx{0}; /**< Nx coefficient. */ + bool is_first_stage{false}; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */ }; +class ITensorInfo; /** Descriptor used by the GEMM kernels */ struct GEMMKernelInfo { GEMMKernelInfo() = default; - GEMMKernelInfo( - unsigned int im, - unsigned int in, - unsigned int ik, - unsigned int idepth_output_gemm3d, - bool ireinterpret_input_as_3d, - bool ibroadcast_bias, - bool ifp_mixed_precision, - bool ihas_pad_y, - ActivationLayerInfo iactivation_info, - int inmult_transpose1xW_width, - int imult_interleave4x4_height, - GEMMLHSMatrixInfo ilhs_info, - GEMMRHSMatrixInfo irhs_info, - int32_t ina_offset, - int32_t inb_offset) - : m(im), n(in), k(ik), depth_output_gemm3d(idepth_output_gemm3d), reinterpret_input_as_3d(ireinterpret_input_as_3d), broadcast_bias(ibroadcast_bias), fp_mixed_precision(ifp_mixed_precision), - has_pad_y(ihas_pad_y), activation_info(iactivation_info), mult_transpose1xW_width(inmult_transpose1xW_width), mult_interleave4x4_height(imult_interleave4x4_height), lhs_info(ilhs_info), - rhs_info(irhs_info), a_offset(ina_offset), b_offset(inb_offset) + GEMMKernelInfo(unsigned int im, + unsigned int in, + unsigned int ik, + unsigned int idepth_output_gemm3d, + bool ireinterpret_input_as_3d, + bool ibroadcast_bias, + bool ifp_mixed_precision, + bool ihas_pad_y, + ActivationLayerInfo iactivation_info, + int inmult_transpose1xW_width, + int imult_interleave4x4_height, + GEMMLHSMatrixInfo ilhs_info, + GEMMRHSMatrixInfo irhs_info, + int32_t ina_offset, + int32_t inb_offset) + : m(im), + n(in), + k(ik), + depth_output_gemm3d(idepth_output_gemm3d), + reinterpret_input_as_3d(ireinterpret_input_as_3d), + broadcast_bias(ibroadcast_bias), + fp_mixed_precision(ifp_mixed_precision), + has_pad_y(ihas_pad_y), + activation_info(iactivation_info), + mult_transpose1xW_width(inmult_transpose1xW_width), + mult_interleave4x4_height(imult_interleave4x4_height), + lhs_info(ilhs_info), + rhs_info(irhs_info), + a_offset(ina_offset), + b_offset(inb_offset) { } - unsigned int m{ 0 }; /**< Number of LHS rows*/ - unsigned int n{ 0 }; /**< Number of RHS columns*/ - unsigned int k{ 0 }; /**< Number of LHS columns or RHS rows */ - unsigned int depth_output_gemm3d{ 0 }; /**< Depth of the output tensor in case is reinterpreted as 3D */ - bool reinterpret_input_as_3d{ false }; /**< Flag used to reinterpret the input as 3D */ - bool broadcast_bias{ false }; /**< Flag used to broadcast the bias addition */ - bool fp_mixed_precision{ false }; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */ - bool has_pad_y{ false }; /**< Flag used to indicate if the input/output tensors have internal pad on the y direction */ - ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */ - int mult_transpose1xW_width{ 1 }; /**< Multiplication factor for the width of the 1xW transposed block */ - int mult_interleave4x4_height{ 1 }; /**< Multiplication factor for the height of the 4x4 interleaved block */ - GEMMLHSMatrixInfo lhs_info{}; /**< LHS matrix information used to retrieve the number of rows processed by each thread */ - GEMMRHSMatrixInfo rhs_info{}; /**< RHS matrix information used for reshaping the RHS matrix */ - int32_t a_offset{ 0 }; /**< Offset to be added to each element of the matrix A */ - int32_t b_offset{ 0 }; /**< Offset to be added to each element of the matrix B */ - GEMMLowpOutputStageInfo output_stage{}; /**< GEMMLowp output stage information */ + unsigned int m{0}; /**< Number of LHS rows*/ + unsigned int n{0}; /**< Number of RHS columns*/ + unsigned int k{0}; /**< Number of LHS columns or RHS rows */ + unsigned int depth_output_gemm3d{0}; /**< Depth of the output tensor in case is reinterpreted as 3D */ + bool reinterpret_input_as_3d{false}; /**< Flag used to reinterpret the input as 3D */ + bool broadcast_bias{false}; /**< Flag used to broadcast the bias addition */ + bool fp_mixed_precision{false}; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */ + bool has_pad_y{ + false}; /**< Flag used to indicate if the input/output tensors have internal pad on the y direction */ + ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */ + int mult_transpose1xW_width{1}; /**< Multiplication factor for the width of the 1xW transposed block */ + int mult_interleave4x4_height{1}; /**< Multiplication factor for the height of the 4x4 interleaved block */ + GEMMLHSMatrixInfo + lhs_info{}; /**< LHS matrix information used to retrieve the number of rows processed by each thread */ + GEMMRHSMatrixInfo rhs_info{}; /**< RHS matrix information used for reshaping the RHS matrix */ + int32_t a_offset{0}; /**< Offset to be added to each element of the matrix A */ + int32_t b_offset{0}; /**< Offset to be added to each element of the matrix B */ + GEMMLowpOutputStageInfo output_stage{}; /**< GEMMLowp output stage information */ }; -/** Descriptor used by the depthwise convolution kernels */ -struct DWCKernelInfo +/** Compute descriptor used by the depthwise convolution native kernel */ +struct DWCComputeKernelInfo { - ActivationLayerInfo activation_info{}; /**< Activation function to perform after the depthwise convolution */ + unsigned int n0{1}; /**< Number of columns processed by each thread */ + unsigned int m0{1}; /**< Number of rows processed by each thread */ + bool export_input_to_cl_image{false}; /**< Export input to cl_image */ + bool export_weights_to_cl_image{false}; /**< Export the weights to cl_image */ }; -/** Descriptor used by the depthwise convolution kernels to retrieve the number of output elements processed by each thread */ -struct DWCWeightsKernelInfo +/** Compute descriptor used by the direct convolution kernel */ +struct DirectConvComputeKernelInfo { - unsigned int n0{ 0 }; /**< Number of columns processed by each thread */ + int32_t m0{1}; /**< Number of rows to be processed by the kernel */ + int32_t n0{1}; /**< Number of columns to be processed by the kernel */ + int32_t k0{1}; /**< Number of partial accumulations to be processed in a single iteration by the kernel */ + bool export_weights_to_cl_image{false}; /**< Flag to export the weights to cl_image */ + bool export_output_to_cl_image{false}; /**< Flag to export the output to cl_image */ + bool export_input_to_cl_image{false}; /**< Flag to export the input to cl_image */ }; /** Descriptor used by the softmax kernels */ struct SoftmaxKernelInfo { - float beta{ 1.f }; /**< A scaling factor for the exponent with default value 1.0 */ - bool is_log{ false }; /**< Flag used to perform Log Softmax operation */ - DataType input_data_type{ DataType::UNKNOWN }; /**< Input tensor data type */ - int32_t axis{ 0 }; /**< The dimension in which to apply softmax. */ + float beta{1.f}; /**< A scaling factor for the exponent with default value 1.0 */ + bool is_log{false}; /**< Flag used to perform Log Softmax operation */ + DataType input_data_type{DataType::UNKNOWN}; /**< Input tensor data type */ + int32_t axis{0}; /**< The dimension in which to apply softmax. */ }; /** Descriptor used by the direct convolution layer output stage kernels */ struct DirectConvolutionLayerOutputStageKernelInfo { - int32_t result_fixedpoint_multiplier{ 0 }; /**< Result output stage multiplier used for quantizing */ - int32_t result_shift{ 0 }; /**< Result output stage shift used for quantizing */ - int32_t result_offset_after_shift{ 0 }; /**< Result offset used for quantizing */ - DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */ + int32_t result_fixedpoint_multiplier{0}; /**< Result output stage multiplier used for quantizing */ + int32_t result_shift{0}; /**< Result output stage shift used for quantizing */ + int32_t result_offset_after_shift{0}; /**< Result offset used for quantizing */ + DataType output_data_type{ + DataType::UNKNOWN}; /**< Output tensor data type to use if the output is not initialized */ }; struct InstanceNormalizationLayerKernelInfo { /** Default constructor */ - InstanceNormalizationLayerKernelInfo() - : InstanceNormalizationLayerKernelInfo(1.f, 0.f, 1e-12, true) + InstanceNormalizationLayerKernelInfo() : InstanceNormalizationLayerKernelInfo(1.f, 0.f, 1e-12, true) { } /** Constructor @@ -167,10 +190,10 @@ struct GEMMLowpReductionKernelInfo { } - int32_t k{ 0 }; /**< Number of matrix columns/rows */ - bool is_reshaped{ false }; /**< True if the input tensor has been reshaped */ - int32_t scalar{ 0 }; /**< Scalar value to multiply each reduced column/row by */ - bool mul_by_scalar{ false }; /**< True if each column/row reduction has to be multiplied by a scalar value */ + int32_t k{0}; /**< Number of matrix columns/rows */ + bool is_reshaped{false}; /**< True if the input tensor has been reshaped */ + int32_t scalar{0}; /**< Scalar value to multiply each reduced column/row by */ + bool mul_by_scalar{false}; /**< True if each column/row reduction has to be multiplied by a scalar value */ }; struct ScaleKernelInfo @@ -192,13 +215,13 @@ struct ScaleKernelInfo bool use_padding = true, bool align_corners = false, DataLayout data_layout = DataLayout::UNKNOWN) noexcept - : interpolation_policy{ interpolation_policy }, - border_mode{ border_mode }, - constant_border_value{ constant_border_value }, - sampling_policy{ sampling_policy }, - use_padding{ use_padding }, - align_corners{ align_corners }, - data_layout{ data_layout } + : interpolation_policy{interpolation_policy}, + border_mode{border_mode}, + constant_border_value{constant_border_value}, + sampling_policy{sampling_policy}, + use_padding{use_padding}, + align_corners{align_corners}, + data_layout{data_layout} { } @@ -211,16 +234,20 @@ struct ScaleKernelInfo DataLayout data_layout; /**< Data layout to use */ }; -struct RemapInfo +struct MatMulKernelInfo { - RemapInfo() = default; - RemapInfo(InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value) - : policy(policy), border_mode(border_mode), constant_border_value(constant_border_value) + MatMulKernelInfo() = default; + MatMulKernelInfo( + bool adj_lhs, bool adj_rhs, int m0 = 1, int n0 = 1, int k0 = 1, bool export_rhs_to_cl_image = false) + : adj_lhs{adj_lhs}, adj_rhs{adj_rhs}, m0{m0}, n0{n0}, k0{k0}, export_rhs_to_cl_image{export_rhs_to_cl_image} { } - InterpolationPolicy policy; - BorderMode border_mode; - PixelValue constant_border_value; + bool adj_lhs{false}; /**< Get Adjoint LHS flag value */ + bool adj_rhs{false}; /**< Get Adjoint RHS flag value */ + int m0{1}; /**< Number of output rows processed by each work-item*/ + int n0{1}; /**< Number of output columns processed by each work-item*/ + int k0{1}; /**< Number of inner accumulations */ + bool export_rhs_to_cl_image{false}; /**< Flag to know whether the RHS tensor should be exported to cl_image*/ }; } // namespace arm_compute -#endif /* ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H */ +#endif // ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H diff --git a/arm_compute/core/Log.h b/arm_compute/core/Log.h index bc0ecb802e..03b861f765 100644 --- a/arm_compute/core/Log.h +++ b/arm_compute/core/Log.h @@ -34,11 +34,11 @@ #define ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER() \ do \ { \ - if(arm_compute::logging::LoggerRegistry::get().logger("CORE") == nullptr) \ + if (arm_compute::logging::LoggerRegistry::get().logger("CORE") == nullptr) \ { \ arm_compute::logging::LoggerRegistry::get().create_reserved_loggers(); \ } \ - } while(false) + } while (false) #else /* ARM_COMPUTE_LOGGING_ENABLED */ #define ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER() #endif /* ARM_COMPUTE_LOGGING_ENABLED */ @@ -53,7 +53,7 @@ { \ ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ ARM_COMPUTE_LOG_MSG("CORE", log_level, msg); \ - } while(false) + } while (false) /** Log a message with format to the core system logger * @@ -66,7 +66,7 @@ { \ ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ ARM_COMPUTE_LOG_MSG_WITH_FORMAT("CORE", log_level, fmt, __VA_ARGS__); \ - } while(false) + } while (false) /** Log a stream to the core system logger * @@ -78,7 +78,7 @@ { \ ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ ARM_COMPUTE_LOG_STREAM("CORE", log_level, ss); \ - } while(false) + } while (false) /** Log information level message to the core system logger * @@ -89,7 +89,7 @@ { \ ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ ARM_COMPUTE_LOG_MSG_CORE(arm_compute::logging::LogLevel::INFO, msg); \ - } while(false) + } while (false) /** Log information level formatted message to the core system logger * @@ -101,7 +101,7 @@ { \ ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(arm_compute::logging::LogLevel::INFO, #fmt, __VA_ARGS__); \ - } while(false) + } while (false) /** Log information level stream to the core system logger * @@ -112,6 +112,6 @@ { \ ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ ARM_COMPUTE_LOG_STREAM_CORE(arm_compute::logging::LogLevel::INFO, ss); \ - } while(false) + } while (false) #endif /* ARM_COMPUTE_LOGGING_MACROS_H */ diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h index 0e3d26c515..0b4df4f2e2 100644 --- a/arm_compute/core/PixelValue.h +++ b/arm_compute/core/PixelValue.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_PIXELVALUE_H #define ARM_COMPUTE_PIXELVALUE_H +#include "arm_compute/core/QuantizationInfo.h" #include "arm_compute/core/Types.h" #include <cstdint> @@ -35,11 +36,7 @@ class PixelValue { public: /** Default constructor: value initialized to 0 */ - PixelValue() noexcept - : value - { - int64_t(0) - } + PixelValue() noexcept : value{int64_t(0)} { } /** Initialize the union with a pixel value of chosen datatype @@ -48,10 +45,9 @@ public: * @param[in] datatype DataType that @p v have to be stored * @param[in] qinfo (Optional) QuantizationInfo to apply in case of quantized data types to @p v */ - PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo()) - : PixelValue() + PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo()) : PixelValue() { - switch(datatype) + switch (datatype) { case DataType::U8: value.u8 = static_cast<uint8_t>(v); @@ -111,8 +107,7 @@ public: * * @param[in] v S8 value. */ - PixelValue(int8_t v) - : PixelValue() + PixelValue(int8_t v) : PixelValue() { value.s8 = v; } @@ -120,8 +115,7 @@ public: * * @param[in] v U8 value. */ - PixelValue(uint8_t v) - : PixelValue() + PixelValue(uint8_t v) : PixelValue() { value.u8 = v; } @@ -129,8 +123,7 @@ public: * * @param[in] v U16 value. */ - PixelValue(uint16_t v) - : PixelValue() + PixelValue(uint16_t v) : PixelValue() { value.u16 = v; } @@ -138,8 +131,7 @@ public: * * @param[in] v S16 value. */ - PixelValue(int16_t v) - : PixelValue() + PixelValue(int16_t v) : PixelValue() { value.s16 = v; } @@ -147,8 +139,7 @@ public: * * @param[in] v U32 value. */ - PixelValue(uint32_t v) - : PixelValue() + PixelValue(uint32_t v) : PixelValue() { value.u32 = v; } @@ -156,8 +147,7 @@ public: * * @param[in] v S32 value. */ - PixelValue(int32_t v) - : PixelValue() + PixelValue(int32_t v) : PixelValue() { value.s32 = v; } @@ -166,8 +156,7 @@ public: * * @param[in] v U64 value. */ - PixelValue(uint64_t v) - : PixelValue() + PixelValue(uint64_t v) : PixelValue() { value.u64 = v; } @@ -175,8 +164,7 @@ public: * * @param[in] v S64 value. */ - PixelValue(int64_t v) - : PixelValue() + PixelValue(int64_t v) : PixelValue() { value.s64 = v; } @@ -184,8 +172,7 @@ public: * * @param[in] v F16 value. */ - PixelValue(bfloat16 v) - : PixelValue() + PixelValue(bfloat16 v) : PixelValue() { value.bf16 = v; } @@ -193,8 +180,7 @@ public: * * @param[in] v F16 value. */ - PixelValue(half v) - : PixelValue() + PixelValue(half v) : PixelValue() { value.f16 = v; } @@ -202,8 +188,7 @@ public: * * @param[in] v F32 value. */ - PixelValue(float v) - : PixelValue() + PixelValue(float v) : PixelValue() { value.f32 = v; } @@ -211,8 +196,7 @@ public: * * @param[in] v F64 value. */ - PixelValue(double v) - : PixelValue() + PixelValue(double v) : PixelValue() { value.f64 = v; } @@ -220,23 +204,23 @@ public: * Use the field corresponding to the image format */ union - { - uint64_t u64; /**< Single channel U64 */ - int64_t s64; /**< Single channel S64 */ - uint8_t rgb[3]; /**< 3 channels: RGB888 */ - uint8_t yuv[3]; /**< 3 channels: Any YUV format */ - uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ - double f64; /**< Single channel double */ - float f32; /**< Single channel float 32 */ - half f16; /**< Single channel F16 */ - bfloat16 bf16; /**< Single channel brain floating-point number */ - uint8_t u8; /**< Single channel U8 */ - int8_t s8; /**< Single channel S8 */ - uint16_t u16; /**< Single channel U16 */ - int16_t s16; /**< Single channel S16 */ - uint32_t u32; /**< Single channel U32 */ - int32_t s32; /**< Single channel S32 */ - } value; + { + uint64_t u64; /**< Single channel U64 */ + int64_t s64; /**< Single channel S64 */ + uint8_t rgb[3]; /**< 3 channels: RGB888 */ + uint8_t yuv[3]; /**< 3 channels: Any YUV format */ + uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ + double f64; /**< Single channel double */ + float f32; /**< Single channel float 32 */ + half f16; /**< Single channel F16 */ + bfloat16 bf16; /**< Single channel brain floating-point number */ + uint8_t u8; /**< Single channel U8 */ + int8_t s8; /**< Single channel S8 */ + uint16_t u16; /**< Single channel U16 */ + int16_t s16; /**< Single channel S16 */ + uint32_t u32; /**< Single channel U32 */ + int32_t s32; /**< Single channel S32 */ + } value; /** Interpret the pixel value as a U8 * * @param[out] v Returned value diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h index b331f7d923..aecba3712e 100644 --- a/arm_compute/core/QuantizationInfo.h +++ b/arm_compute/core/QuantizationInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,16 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_QUANTIZATION_INFO_H -#define ARM_COMPUTE_QUANTIZATION_INFO_H +#ifndef ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H +#define ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H -#include "arm_compute/core/Error.h" #include "arm_compute/core/Rounding.h" +#include "arm_compute/core/utils/misc/Utility.h" + #include "support/ToolchainSupport.h" -#include "utils/misc/Utility.h" -#include <cstddef> -#include <type_traits> #include <vector> namespace arm_compute @@ -44,8 +42,7 @@ using qasymm16_t = uint16_t; /**< 16 bit quantized asymmetric scalar value struct UniformQuantizationInfo { /** Default constructor */ - UniformQuantizationInfo() - : scale(0.f), offset(0) + UniformQuantizationInfo() : scale(0.f), offset(0) { } /** Constructor @@ -53,8 +50,7 @@ struct UniformQuantizationInfo * @param[in] scale Quantization scale * @param[in] offset Quantization offset */ - UniformQuantizationInfo(float scale, int32_t offset) - : scale(scale), offset(offset) + UniformQuantizationInfo(float scale, int32_t offset) : scale(scale), offset(offset) { } /** Checks if the scale and offset are both zero */ @@ -72,9 +68,7 @@ class QuantizationInfo { public: /** Default constructor */ - QuantizationInfo() noexcept - : _scale(), - _offset() + QuantizationInfo() noexcept : _scale(), _offset() { } /** Construct quantization info. @@ -83,19 +77,19 @@ public: * * @param[in] scale Scale. */ - QuantizationInfo(float scale) - : _scale(1, scale), _offset() + QuantizationInfo(float scale) : _scale(1, scale), _offset() { } /** Construct quantization info. * * @note Used for asymmetric quantization * - * @param[in] scale Scale. - * @param[in] offset Offset. + * @param[in] scale Scale. + * @param[in] offset Offset. + * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change. */ - QuantizationInfo(float scale, int offset) - : _scale(1, scale), _offset(1, offset) + QuantizationInfo(float scale, int offset, bool is_dynamic = false) + : _scale(1, scale), _offset(1, offset), _is_dynamic(is_dynamic) { } /** Construct quantization info. @@ -104,19 +98,19 @@ public: * * @param[in] scale Scale. */ - QuantizationInfo(std::vector<float> scale) - : _scale(scale), _offset() + QuantizationInfo(std::vector<float> scale) : _scale(scale), _offset() { } /** Construct quantization info. * * @note Used for asymmetric per channel quantization * - * @param[in] scale Scale. - * @param[in] offset Offset. + * @param[in] scale Scale. + * @param[in] offset Offset. + * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change. */ - QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset) - : _scale(scale), _offset(offset) + QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset, bool is_dynamic = false) + : _scale(scale), _offset(offset), _is_dynamic(is_dynamic) { } /** Scale vector accessor @@ -135,6 +129,14 @@ public: { return _offset; } + /** is_dynamic accessor + * + * @return If true, the scale and offset may change, so operators will need to read on every run + */ + bool is_dynamic() const + { + return _is_dynamic; + } /** Indicates whether this QuantizationInfo has valid settings or not * * @return True if the this has invalid settings. @@ -159,6 +161,8 @@ public: private: std::vector<float> _scale; /**< Vector containing scaling factors */ std::vector<int32_t> _offset; /**< Vector containing zero offsets */ + bool _is_dynamic = + false; /**< If true, the scale and offset may change, so operators will need to read on every run */ }; /** Check whether two quantization info are equal. @@ -211,8 +215,7 @@ inline bool operator!=(const UniformQuantizationInfo &lhs, const UniformQuantiza template <typename QUANTIZED_TYPE = uint8_t> struct Qasymm8QuantizationHelper { - static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value - || std::is_same<QUANTIZED_TYPE, int8_t>::value, + static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value || std::is_same<QUANTIZED_TYPE, int8_t>::value, "quantized type should be either uint8_t or int8_t."); /** Quantize a value given a 8-bit asymmetric quantization scheme @@ -237,9 +240,10 @@ struct Qasymm8QuantizationHelper * * @return Quantized value */ - static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy) + static inline QUANTIZED_TYPE + quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy) { - if(rounding_policy == RoundingPolicy::TO_NEAREST_UP) + if (rounding_policy == RoundingPolicy::TO_NEAREST_UP) { return quantize(value, qinfo); } @@ -257,7 +261,8 @@ struct Qasymm8QuantizationHelper * * @return Quantized value */ - static inline QUANTIZED_TYPE quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) + static inline QUANTIZED_TYPE + quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { const UniformQuantizationInfo uqinfo = qinfo.uniform(); ARM_COMPUTE_ERROR_ON(uqinfo.scale == 0); @@ -300,7 +305,8 @@ struct Qasymm8QuantizationHelper * @return Quantized value */ template <typename INFO_TYPE> -inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) +inline uint8_t +quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { return Qasymm8QuantizationHelper<uint8_t>::quantize(value, qinfo, rounding_policy); } @@ -314,7 +320,9 @@ inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPol * @return Quantized value */ template <typename INFO_TYPE> -inline int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) +inline int8_t quantize_qasymm8_signed(float value, + const INFO_TYPE &qinfo, + RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { return Qasymm8QuantizationHelper<int8_t>::quantize(value, qinfo, rounding_policy); } @@ -436,6 +444,19 @@ inline float dequantize(uint16_t value, float scale, int32_t offset) return (static_cast<int>(value) - offset) * scale; } +/** Dequantize a value given a 32-bit asymmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] scale Scale to use for dequantization + * @param[in] offset Zero-offset to use for dequantization + * + * @return Dequantized value + */ +inline float dequantize(int32_t value, float scale, int32_t offset) +{ + return (static_cast<int>(value) - offset) * scale; +} + /** Quantize a value given a 16-bit symmetric quantization scheme * * @param[in] value Value to quantize @@ -444,7 +465,9 @@ inline float dequantize(uint16_t value, float scale, int32_t offset) * * @return Quantized value */ -inline int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) +inline int16_t quantize_qsymm16(float value, + const UniformQuantizationInfo &qinfo, + RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { int quantized = arm_compute::round(value / qinfo.scale, rounding_policy); quantized = arm_compute::utility::clamp<int, int16_t>(quantized); @@ -495,7 +518,9 @@ inline float dequantize_qsymm16(int16_t value, const QuantizationInfo &qinfo) * * @return Quantized value */ -inline uint16_t quantize_qasymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) +inline uint16_t quantize_qasymm16(float value, + const UniformQuantizationInfo &qinfo, + RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset; quantized = arm_compute::utility::clamp<int, uint16_t>(quantized); @@ -538,6 +563,31 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo) return dequantize_qasymm16(value, qinfo.uniform()); } +/** Dequantize a value given a 32-bit asymmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] qinfo Quantization information to use for dequantizing + * + * @return Dequantized value + */ +inline float dequantize_s32(int32_t value, const UniformQuantizationInfo &qinfo) +{ + return (static_cast<int>(value) - qinfo.offset) * qinfo.scale; +} + +/** Dequantize a value given a 32-bit asymmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] qinfo Quantization information to use for dequantizing + * + * @return Dequantized value + */ + +inline float dequantize_s32(int32_t value, const QuantizationInfo &qinfo) +{ + return dequantize_s32(value, qinfo.uniform()); +} + /* * In case of requantization of a quantized input tensor to an output tensor with another quantization * instead of applying dequantization and then a quantization functions, we just compute new scale and @@ -568,7 +618,8 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo) * z_n = - z_i * s_i / s_o + z_o * */ -inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, const UniformQuantizationInfo &uqinfo_out) +inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, + const UniformQuantizationInfo &uqinfo_out) { float scale_to_apply = uqinfo_out.scale; int32_t offset_to_apply = uqinfo_out.offset; @@ -582,4 +633,4 @@ inline UniformQuantizationInfo compute_requantization_scale_offset(const Uniform } } // namespace arm_compute -#endif /* ARM_COMPUTE_QUANTIZATION_INFO_H */ +#endif // ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H diff --git a/arm_compute/core/Rounding.h b/arm_compute/core/Rounding.h index b6817b5107..30a5a0fe9d 100644 --- a/arm_compute/core/Rounding.h +++ b/arm_compute/core/Rounding.h @@ -42,5 +42,5 @@ enum class RoundingPolicy * @return Rounded value of the argument x. */ int round(float x, RoundingPolicy rounding_policy); -} +} // namespace arm_compute #endif /*ARM_COMPUTE_ROUNDING_H */ diff --git a/arm_compute/core/Size2D.h b/arm_compute/core/Size2D.h index bcd89cb310..672b392050 100644 --- a/arm_compute/core/Size2D.h +++ b/arm_compute/core/Size2D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,8 +41,7 @@ public: * @param[in] w Width of the image or rectangle * @param[in] h Height of the image or rectangle */ - Size2D(size_t w, size_t h) - : width(w), height(h) + Size2D(size_t w, size_t h) noexcept : width(w), height(h) { } /** The area of the image or rectangle calculated as (width * height) @@ -89,5 +88,5 @@ public: size_t width = {}; /**< Width of the image region or rectangle */ size_t height = {}; /**< Height of the image region or rectangle */ }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_SIZE2D_H */ diff --git a/arm_compute/core/Size3D.h b/arm_compute/core/Size3D.h new file mode 100644 index 0000000000..e2dc6fe012 --- /dev/null +++ b/arm_compute/core/Size3D.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_SIZE3D_H +#define ARM_COMPUTE_SIZE3D_H + +#include <string> + +namespace arm_compute +{ +/** Class for specifying the size of a 3D shape or object */ +class Size3D +{ +public: + /** Default constructor */ + Size3D() = default; + /** Constructor. Initializes "width", "height" and "depth" respectively with "w", "h" and "d" + * + * @param[in] w Width of the 3D shape or object + * @param[in] h Height of the 3D shape or object + * @param[in] d Depth of the 3D shape or object + */ + Size3D(size_t w, size_t h, size_t d) noexcept : width(w), height(h), depth(d) + { + } + + /** Convert the values stored to string + * + * @return string of (width x height x depth). + */ + std::string to_string() const; + + /** Semantic accessor for width as x. + * + * @return x. + */ + size_t x() const + { + return width; + } + + /** Semantic accessor for height as y. + * + * @return y. + */ + size_t y() const + { + return height; + } + + /** Semantic accessor for depth as z. + * + * @return z. + */ + size_t z() const + { + return depth; + } + + bool operator!=(const Size3D &other) const + { + return !(*this == other); + } + + bool operator==(const Size3D &other) const + { + return (width == other.width) && (height == other.height) && (depth == other.depth); + } + +public: + size_t width = {}; /**< Width of the 3D shape or object */ + size_t height = {}; /**< Height of the 3D shape or object */ + size_t depth = {}; /**< Depth of the 3D shape or object */ +}; + +} // namespace arm_compute +#endif /* ARM_COMPUTE_SIZE3D_H */ diff --git a/arm_compute/core/Steps.h b/arm_compute/core/Steps.h index 208fc4b294..6b261becc0 100644 --- a/arm_compute/core/Steps.h +++ b/arm_compute/core/Steps.h @@ -45,8 +45,7 @@ public: * @param[in] steps Values to initialize the steps. */ template <typename... Ts> - Steps(Ts... steps) - : Dimensions{ steps... } + Steps(Ts... steps) : Dimensions{steps...} { // Initialize empty dimensions to 1 std::fill(_id.begin() + _num_dimensions, _id.end(), 1); @@ -62,5 +61,5 @@ public: /** Default destructor */ ~Steps() = default; }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_STEPS_H*/ diff --git a/arm_compute/core/Strides.h b/arm_compute/core/Strides.h index 265799e41e..627b219987 100644 --- a/arm_compute/core/Strides.h +++ b/arm_compute/core/Strides.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2019, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,7 @@ #include <algorithm> #include <array> #include <cstddef> +#include <cstdint> namespace arm_compute { @@ -42,8 +43,7 @@ public: * @param[in] strides Values to initialize the strides. */ template <typename... Ts> - constexpr Strides(Ts... strides) - : Dimensions{ strides... } + constexpr Strides(Ts... strides) : Dimensions{strides...} { } /** Allow instances of this class to be copy constructed */ diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h index 1b2278d99b..7a3ee2cfd0 100644 --- a/arm_compute/core/SubTensorInfo.h +++ b/arm_compute/core/SubTensorInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,10 +24,9 @@ #ifndef ARM_COMPUTE_SUBTENSORINFO_H #define ARM_COMPUTE_SUBTENSORINFO_H -#include "arm_compute/core/ITensorInfo.h" - #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/TensorShape.h" @@ -73,7 +72,7 @@ public: // Inherited methods overridden: std::unique_ptr<ITensorInfo> clone() const override; - ITensorInfo &set_data_type(DataType data_type) override + ITensorInfo &set_data_type(DataType data_type) override { ARM_COMPUTE_ERROR_ON(_parent == nullptr); _parent->set_data_type(data_type); @@ -116,7 +115,13 @@ public: ARM_COMPUTE_ERROR_ON(_parent == nullptr); return _parent->auto_padding(); }; + + ITensorInfo &set_lock_paddings(bool flag) override; + + bool lock_paddings() const override; + bool extend_padding(const PaddingSize &padding) override; + size_t dimension(size_t index) const override { return _tensor_shape[index]; @@ -137,7 +142,7 @@ public: return _parent->offset_element_in_bytes(_coords); } int32_t offset_element_in_bytes(const Coordinates &pos) const override; - size_t element_size() const override + size_t element_size() const override { ARM_COMPUTE_ERROR_ON(_parent == nullptr); return _parent->element_size(); @@ -196,12 +201,23 @@ public: ARM_COMPUTE_ERROR_ON(_parent == nullptr); return _parent->is_dynamic(); } + bool are_values_constant() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->are_values_constant(); + } ITensorInfo &set_is_resizable(bool is_resizable) override { ARM_COMPUTE_ERROR_ON(_parent == nullptr); _parent->set_is_resizable(is_resizable); return *this; } + ITensorInfo &set_are_values_constant(bool are_values_constant) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_are_values_constant(are_values_constant); + return *this; + } ValidRegion valid_region() const override { return _valid_region; @@ -210,7 +226,7 @@ public: { ARM_COMPUTE_ERROR_ON(_parent == nullptr); // Check if subtensor is valid if parent is configured - if(_parent->tensor_shape().total_size() != 0) + if (_parent->tensor_shape().total_size() != 0) { ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(_parent->valid_region(), valid_region); } @@ -226,6 +242,17 @@ public: ARM_COMPUTE_ERROR_ON(_parent == nullptr); return _parent->data_layout(); } + ITensorInfo::Id id() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->id(); + } + ITensorInfo &set_id(ITensorInfo::Id id) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_id(id); + return *this; + } private: ITensorInfo *_parent; @@ -234,6 +261,7 @@ private: Coordinates _coords; ValidRegion _valid_region; bool _extend_parent; + bool _lock_paddings; }; } // namespace arm_compute #endif /*ARM_COMPUTE_SUBTENSORINFO_H */ diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h index a4330849bf..b18f750427 100644 --- a/arm_compute/core/TensorInfo.h +++ b/arm_compute/core/TensorInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,16 +24,14 @@ #ifndef ARM_COMPUTE_TENSORINFO_H #define ARM_COMPUTE_TENSORINFO_H -#include "arm_compute/core/ITensorInfo.h" - -#include "ITensorInfo.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" +#include "ITensorInfo.h" #include <cstddef> #include <memory> @@ -50,7 +48,7 @@ public: /** Allow instances of this class to be copy constructed */ TensorInfo(const ITensorInfo &info); /** Allow instances of this class to be copy constructed */ - TensorInfo(const TensorInfo &) = default; + TensorInfo(const TensorInfo &); /** Allow instances of this class to be copied */ TensorInfo &operator=(const TensorInfo &) = default; /** Allow instances of this class to be move constructed */ @@ -113,7 +111,10 @@ public: * @param[in] data_type Data type to use for each tensor element * @param[in] quantization_info The quantization settings for the tensor data. */ - TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, QuantizationInfo quantization_info); + TensorInfo(const TensorShape &tensor_shape, + size_t num_channels, + DataType data_type, + QuantizationInfo quantization_info); /** Initialize the tensor info with just a format. * @@ -137,7 +138,11 @@ public: * @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element. * @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element). */ - void init(const TensorShape &tensor_shape, Format format, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, size_t total_size_in_bytes); + void init(const TensorShape &tensor_shape, + Format format, + const Strides &strides_in_bytes, + size_t offset_first_element_in_bytes, + size_t total_size_in_bytes); /** Initialize the tensor info with just a format. * @@ -165,8 +170,12 @@ public: * @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element. * @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element). */ - void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, - size_t total_size_in_bytes); + void init(const TensorShape &tensor_shape, + size_t num_channels, + DataType data_type, + const Strides &strides_in_bytes, + size_t offset_first_element_in_bytes, + size_t total_size_in_bytes); /** Initialize the metadata structure for the given tensor shape and single-plane format, (Padding is automatically calculated) * * @note The padding used by this method is really conservative so that the tensor can be used for most functions. @@ -192,17 +201,19 @@ public: // Inherited methods overridden: std::unique_ptr<ITensorInfo> clone() const override; - ITensorInfo &set_data_type(DataType data_type) override; - ITensorInfo &set_num_channels(int num_channels) override; - ITensorInfo &set_format(Format format) override; - ITensorInfo &set_tensor_shape(const TensorShape &shape) override; - ITensorInfo &set_tensor_dims_state(const TensorDimsState &state) override; - ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override; - ITensorInfo &set_data_layout(const DataLayout &data_layout) override; - ITensorInfo &reset_padding() override; - bool auto_padding() override; - bool extend_padding(const PaddingSize &padding) override; - size_t dimension(size_t index) const override + ITensorInfo &set_data_type(DataType data_type) override; + ITensorInfo &set_num_channels(int num_channels) override; + ITensorInfo &set_format(Format format) override; + ITensorInfo &set_tensor_shape(const TensorShape &shape) override; + ITensorInfo &set_tensor_dims_state(const TensorDimsState &state) override; + ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override; + ITensorInfo &set_data_layout(const DataLayout &data_layout) override; + ITensorInfo &reset_padding() override; + bool auto_padding() override; + ITensorInfo &set_lock_paddings(bool flag) override; + bool lock_paddings() const override; + bool extend_padding(const PaddingSize &padding) override; + size_t dimension(size_t index) const override { return _tensor_shape[index]; } @@ -219,7 +230,7 @@ public: return _offset_first_element_in_bytes; } int32_t offset_element_in_bytes(const Coordinates &pos) const override; - size_t element_size() const override + size_t element_size() const override { return data_size_from_type(_data_type) * _num_channels; } @@ -265,7 +276,12 @@ public: } bool is_dynamic() const override { - return std::find(std::cbegin(_dims_state), std::cend(_dims_state), get_dynamic_state_value()) != std::cend(_dims_state); + return std::find(std::cbegin(_dims_state), std::cend(_dims_state), get_dynamic_state_value()) != + std::cend(_dims_state); + } + bool are_values_constant() const override + { + return _are_values_constant; } ITensorInfo &set_is_resizable(bool is_resizable) override { @@ -288,6 +304,21 @@ public: { return _data_layout; } + ITensorInfo &set_are_values_constant(bool are_values_constant) override + { + _are_values_constant = are_values_constant; + return *this; + } + ITensorInfo::Id id() const override + { + return _id; + } + ITensorInfo &set_id(ITensorInfo::Id id) override + { + _id = id; + return *this; + } + inline friend bool operator==(const TensorInfo &lhs, const TensorInfo &rhs); private: /** Calculates strides, offset and total size resulting from the specified padding around the XY plane. @@ -309,6 +340,29 @@ private: PaddingSize _padding; QuantizationInfo _quantization_info; DataLayout _data_layout; + bool _are_values_constant; + ITensorInfo::Id _id; + bool _lock_paddings; }; + +/** Check whether two tensor info are equal. + * + * @param[in] lhs LHS tensor info. + * @param[in] rhs RHS tensor info. + * + * @return True if the given tensor infos are the same. + */ +inline bool operator==(const TensorInfo &lhs, const TensorInfo &rhs) +{ + return (lhs._total_size == rhs._total_size) && + (lhs._offset_first_element_in_bytes == rhs._offset_first_element_in_bytes) && + (lhs._strides_in_bytes == rhs._strides_in_bytes) && (lhs._num_channels == rhs._num_channels) && + (lhs._tensor_shape == rhs._tensor_shape) && (lhs._dims_state == rhs._dims_state) && + (lhs._data_type == rhs._data_type) && (lhs._format == rhs._format) && + (lhs._is_resizable == rhs._is_resizable) && (lhs._valid_region == rhs._valid_region) && + (lhs._padding == rhs._padding) && (lhs._quantization_info == rhs._quantization_info) && + (lhs._data_layout == rhs._data_layout) && (lhs._are_values_constant == rhs._are_values_constant) && + (lhs._id == rhs._id); +} } // namespace arm_compute #endif /*ARM_COMPUTE_TENSORINFO_H */ diff --git a/arm_compute/core/TensorShape.h b/arm_compute/core/TensorShape.h index b6ab9dc75a..c1707e262f 100644 --- a/arm_compute/core/TensorShape.h +++ b/arm_compute/core/TensorShape.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,11 +44,10 @@ public: * @param[in] dims Values to initialize the dimensions. */ template <typename... Ts> - TensorShape(Ts... dims) - : Dimensions{ dims... } + TensorShape(Ts... dims) : Dimensions{dims...} { // Initialize unspecified dimensions to 1 - if(_num_dimensions > 0) + if (_num_dimensions > 0) { std::fill(_id.begin() + _num_dimensions, _id.end(), 1); } @@ -79,7 +78,7 @@ public: TensorShape &set(size_t dimension, size_t value, bool apply_dim_correction = true, bool increase_dim_unit = true) { // Clear entire shape if one dimension is zero - if(value == 0) + if (value == 0) { _num_dimensions = 0; std::fill(_id.begin(), _id.end(), 0); @@ -94,7 +93,7 @@ public: Dimensions::set(dimension, value, increase_dim_unit); // Correct number dimensions to ignore trailing dimensions of size 1 - if(apply_dim_correction) + if (apply_dim_correction) { apply_dimension_correction(); } @@ -106,9 +105,10 @@ public: * * @note The upper dimensions of the tensor shape will be shifted down by 1 * - * @param[in] n Dimension to remove + * @param[in] n Dimension to remove + * @param[in] apply_dim_correction (Optional) Flag to state whether apply dimension correction (removing trailing dimensions with size of 1) after removing a dimension. */ - void remove_dimension(size_t n) + void remove_dimension(size_t n, bool apply_dim_correction = true) { ARM_COMPUTE_ERROR_ON(_num_dimensions < 1); ARM_COMPUTE_ERROR_ON(n >= _num_dimensions); @@ -122,7 +122,10 @@ public: std::fill(_id.begin() + _num_dimensions, _id.end(), 1); // Correct number dimensions to ignore trailing dimensions of size 1 - apply_dimension_correction(); + if (apply_dim_correction) + { + apply_dimension_correction(); + } } /** Collapse the first n dimensions. @@ -208,26 +211,26 @@ public: * @return The broadcasted shape or an empty shape if the shapes are not broadcast compatible. */ template <typename... Shapes> - static TensorShape broadcast_shape(const Shapes &... shapes) + static TensorShape broadcast_shape(const Shapes &...shapes) { TensorShape bc_shape; - auto broadcast = [&bc_shape](const TensorShape & other) + auto broadcast = [&bc_shape](const TensorShape &other) { - if(bc_shape.num_dimensions() == 0) + if (bc_shape.num_dimensions() == 0) { bc_shape = other; } - else if(other.num_dimensions() != 0) + else if (other.num_dimensions() != 0) { - for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d) + for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d) { const size_t dim_min = std::min(bc_shape[d], other[d]); const size_t dim_max = std::max(bc_shape[d], other[d]); - if((dim_min != 1) && (dim_min != dim_max)) + if ((dim_min != 1) && (dim_min != dim_max)) { - bc_shape = TensorShape{ 0U }; + bc_shape = TensorShape{0U}; break; } @@ -245,9 +248,9 @@ private: /** Remove trailing dimensions of size 1 from the reported number of dimensions. */ void apply_dimension_correction() { - for(int i = static_cast<int>(_num_dimensions) - 1; i > 0; --i) + for (int i = static_cast<int>(_num_dimensions) - 1; i > 0; --i) { - if(_id[i] == 1) + if (_id[i] == 1) { --_num_dimensions; } @@ -258,5 +261,5 @@ private: } } }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_TENSORSHAPE_H*/ diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 48c87cd8ac..f2f60c150e 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,17 +21,52 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TYPES_H -#define ARM_COMPUTE_TYPES_H - +#ifndef ACL_ARM_COMPUTE_CORE_TYPES_H +#define ACL_ARM_COMPUTE_CORE_TYPES_H + +/** The following symbols have been moved to: + * half + * PermutationVector + * Format + * DataType + * DataLayout + * DataLayoutDimension + * PadStrideInfo + * WeightFormat + * Channel + * DimensionRoundingType + */ +#include "arm_compute/core/CoreTypes.h" +/** The following symbols have been moved to: + * ActivationFunction + * ActivationLayerInfo + */ +#include "arm_compute/function_info/ActivationLayerInfo.h" +/** The following symbols have been moved to: + * ConvolutionInfo + */ +#include "arm_compute/function_info/ConvolutionInfo.h" +/** The following symbols have been moved to: + * FullyConnectedLayerInfo + */ +#include "arm_compute/function_info/FullyConnectedLayerInfo.h" +/** The following symbols have been moved to: + * GEMMLowpOutputStageType + * GEMMLowpOutputStageInfo + * GEMMInfo + */ +#include "arm_compute/function_info/GEMMInfo.h" +/** The following symbols have been moved to: + * MatMulInfo + */ #include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/QuantizationInfo.h" #include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Strides.h" +#include "arm_compute/core/Size3D.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/utils/misc/Macros.h" +#include "arm_compute/function_info/MatMulInfo.h" + #include "support/Bfloat16.h" -#include "support/Half.h" #include <cmath> #include <cstddef> @@ -42,62 +77,9 @@ namespace arm_compute { -/** 16-bit floating point type */ -using half = half_float::half; - -/** Permutation vector */ -using PermutationVector = Strides; /** Bidirectional strides */ using BiStrides = Coordinates; -/** Image colour formats */ -enum class Format -{ - UNKNOWN, /**< Unknown image format */ - U8, /**< 1 channel, 1 U8 per channel */ - S16, /**< 1 channel, 1 S16 per channel */ - U16, /**< 1 channel, 1 U16 per channel */ - S32, /**< 1 channel, 1 S32 per channel */ - U32, /**< 1 channel, 1 U32 per channel */ - BFLOAT16, /**< 16-bit brain floating-point number */ - F16, /**< 1 channel, 1 F16 per channel */ - F32, /**< 1 channel, 1 F32 per channel */ - UV88, /**< 2 channel, 1 U8 per channel */ - RGB888, /**< 3 channels, 1 U8 per channel */ - RGBA8888, /**< 4 channels, 1 U8 per channel */ - YUV444, /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */ - YUYV422, /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */ - NV12, /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */ - NV21, /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */ - IYUV, /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */ - UYVY422 /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */ -}; - -/** Available data types */ -enum class DataType -{ - UNKNOWN, /**< Unknown data type */ - U8, /**< unsigned 8-bit number */ - S8, /**< signed 8-bit number */ - QSYMM8, /**< quantized, symmetric fixed-point 8-bit number */ - QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number unsigned */ - QASYMM8_SIGNED, /**< quantized, asymmetric fixed-point 8-bit number signed */ - QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */ - U16, /**< unsigned 16-bit number */ - S16, /**< signed 16-bit number */ - QSYMM16, /**< quantized, symmetric fixed-point 16-bit number */ - QASYMM16, /**< quantized, asymmetric fixed-point 16-bit number */ - U32, /**< unsigned 32-bit number */ - S32, /**< signed 32-bit number */ - U64, /**< unsigned 64-bit number */ - S64, /**< signed 64-bit number */ - BFLOAT16, /**< 16-bit brain floating-point number */ - F16, /**< 16-bit floating-point number */ - F32, /**< 32-bit floating-point number */ - F64, /**< 64-bit floating-point number */ - SIZET /**< size_t */ -}; - /** Available Sampling Policies */ enum class SamplingPolicy { @@ -105,32 +87,13 @@ enum class SamplingPolicy TOP_LEFT /**< Samples are taken at pixel top left corner */ }; -/** [DataLayout enum definition] **/ - -/** Supported tensor data layouts */ -enum class DataLayout -{ - UNKNOWN, /**< Unknown data layout */ - NCHW, /**< Num samples, channels, height, width */ - NHWC /**< Num samples, height, width, channels */ -}; -/** [DataLayout enum definition] **/ - -/** Supported tensor data layout dimensions */ -enum class DataLayoutDimension -{ - CHANNEL, /**< channel */ - HEIGHT, /**< height */ - WIDTH, /**< width */ - BATCHES /**< batches */ -}; - /** Available ConvolutionMethod*/ enum class ConvolutionMethod { GEMM, /**< Convolution using GEMM */ GEMM_CONV2D, /**< Direct 2D GEMM convolution */ DIRECT, /**< Direct convolution */ + INDIRECT, /**< Indirect convolution */ WINOGRAD, /**< Convolution using Winograd */ FFT /**< Convolution using FFT */ }; @@ -145,8 +108,9 @@ enum class DepthwiseConvolutionFunction /** Available DeconvolutionMethod*/ enum class DeconvolutionMethod { - GEMM, /**< Deconvolution using GEMM */ - DIRECT, /**< Direct deconvolution */ + GEMM, /**< Deconvolution using GEMM */ + DIRECT, /**< Direct deconvolution */ + UPSCALE_CONV2D /**< Deconvolution with Upscaling */ }; /** Available FuseBatchNormalizationType*/ @@ -179,8 +143,7 @@ enum class ComparisonOperation struct ValidRegion { /** Default constructor */ - ValidRegion() - : anchor{}, shape{} + ValidRegion() : anchor{}, shape{} { } @@ -201,8 +164,7 @@ struct ValidRegion * @param[in] a_shape Shape of the valid region. * */ - ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape) - : anchor{ an_anchor }, shape{ a_shape } + ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape) : anchor{an_anchor}, shape{a_shape} { anchor.set_num_dimensions(std::max(anchor.num_dimensions(), shape.num_dimensions())); } @@ -215,7 +177,7 @@ struct ValidRegion * */ ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape, size_t num_dimensions) - : anchor{ an_anchor }, shape{ a_shape } + : anchor{an_anchor}, shape{a_shape} { ARM_COMPUTE_ERROR_ON(num_dimensions < std::max(anchor.num_dimensions(), shape.num_dimensions())); anchor.set_num_dimensions(num_dimensions); @@ -248,9 +210,22 @@ struct ValidRegion return *this; } + /** Check whether two valid regions are equal. + * + * @param[in] lhs LHS valid region + * @param[in] rhs RHS valid region + * + * @return True if the valid regions are the same. + */ + inline friend bool operator==(const ValidRegion &lhs, const ValidRegion &rhs); + Coordinates anchor; /**< Anchor for the start of the valid region. */ TensorShape shape; /**< Shape of the valid region. */ }; +inline bool operator==(const ValidRegion &lhs, const ValidRegion &rhs) +{ + return (lhs.anchor == rhs.anchor) && (lhs.shape == rhs.shape); +} /** Methods available to handle borders */ enum class BorderMode @@ -264,32 +239,24 @@ enum class BorderMode struct BorderSize { /** Empty border, i.e. no border */ - constexpr BorderSize() noexcept - : top{ 0 }, - right{ 0 }, - bottom{ 0 }, - left{ 0 } + constexpr BorderSize() noexcept : top{0}, right{0}, bottom{0}, left{0} { } /** Border with equal size around the 2D plane */ - explicit constexpr BorderSize(unsigned int size) noexcept - : top{ size }, - right{ size }, - bottom{ size }, - left{ size } + explicit constexpr BorderSize(unsigned int size) noexcept : top{size}, right{size}, bottom{size}, left{size} { } /** Border with same size for top/bottom and left/right */ constexpr BorderSize(unsigned int top_bottom, unsigned int left_right) - : top{ top_bottom }, right{ left_right }, bottom{ top_bottom }, left{ left_right } + : top{top_bottom}, right{left_right}, bottom{top_bottom}, left{left_right} { } /** Border with different sizes */ constexpr BorderSize(unsigned int top, unsigned int right, unsigned int bottom, unsigned int left) - : top{ top }, right{ right }, bottom{ bottom }, left{ left } + : top{top}, right{right}, bottom{bottom}, left{left} { } @@ -341,7 +308,7 @@ struct BorderSize * * @return true if they are equal */ - bool operator==(const BorderSize &rhs) + bool operator==(const BorderSize &rhs) const { return (top == rhs.top) && (right == rhs.right) && (bottom == rhs.bottom) && (left == rhs.left); } @@ -352,7 +319,7 @@ struct BorderSize * * @return true if they are different */ - bool operator!=(const BorderSize &rhs) + bool operator!=(const BorderSize &rhs) const { return !(*this == rhs); } @@ -378,7 +345,11 @@ struct BorderSize /** Container for 2D padding size */ using PaddingSize = BorderSize; -/** Policy to handle overflow */ +/** Policy to handle integer overflow + * @note: This is ignored by floating point operations where the overflow behavior adheres to the IEEE-754 standard + * which states that in case of overflow ±infinity is returned for the round-to-nearest modes (and follows the + * rounding rules for the directed rounding modes) by default. + */ enum class ConvertPolicy { WRAP, /**< Wrap around */ @@ -390,7 +361,7 @@ enum class InterpolationPolicy { NEAREST_NEIGHBOR, /**< Output values are defined to match the source pixel whose center is nearest to the sample position */ BILINEAR, /**< Output values are defined by bilinear interpolation between the pixels */ - AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */ + AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */ }; /** Bilinear Interpolation method used by LKTracker */ @@ -433,23 +404,6 @@ using PaddingList = std::vector<PaddingInfo>; /** Information to produce a tiled version of a Tensor */ using Multiples = std::vector<uint32_t>; -/** Available channels */ -enum class Channel -{ - UNKNOWN, /** Unknown channel format */ - C0, /**< First channel (used by formats with unknown channel types). */ - C1, /**< Second channel (used by formats with unknown channel types). */ - C2, /**< Third channel (used by formats with unknown channel types). */ - C3, /**< Fourth channel (used by formats with unknown channel types). */ - R, /**< Red channel. */ - G, /**< Green channel. */ - B, /**< Blue channel. */ - A, /**< Alpha channel. */ - Y, /**< Luma channel. */ - U, /**< Cb/U channel. */ - V /**< Cr/V/Value channel. */ -}; - /** Available reduction operations */ enum class ReductionOperation { @@ -514,21 +468,12 @@ enum class NormType */ struct DetectionWindow { - uint16_t x{ 0 }; /**< Top-left x coordinate */ - uint16_t y{ 0 }; /**< Top-left y coordinate */ - uint16_t width{ 0 }; /**< Width of the detection window */ - uint16_t height{ 0 }; /**< Height of the detection window */ - uint16_t idx_class{ 0 }; /**< Index of the class */ - float score{ 0.f }; /**< Confidence value for the detection window */ -}; - -/** Dimension rounding type when down-scaling on CNNs - * @note Used in pooling and convolution layer - */ -enum class DimensionRoundingType -{ - FLOOR, /**< Floor rounding */ - CEIL /**< Ceil rounding */ + uint16_t x{0}; /**< Top-left x coordinate */ + uint16_t y{0}; /**< Top-left y coordinate */ + uint16_t width{0}; /**< Width of the detection window */ + uint16_t height{0}; /**< Height of the detection window */ + uint16_t idx_class{0}; /**< Index of the class */ + float score{0.f}; /**< Confidence value for the detection window */ }; /** Available pooling types */ @@ -565,12 +510,28 @@ public: * @param[in] im_width (Optional) Boxes whose centers (on the x axis) is beyond im_width will be filtered. Defaults to 1 * @param[in] im_height (Optional) Boxes whose centers (on the y axis) is beyond im_height will be filtered. Defaults to 1 */ - BoxNMSLimitInfo(float score_thresh = 0.05f, float nms = 0.3f, - int detections = 100, bool soft_nms_enabled = false, - NMSType soft_nms_method = NMSType::LINEAR, - float soft_nms_sigma = 0.5f, float soft_nms_min_score_thres = 0.001f, bool suppress_size = false, float min_size = 1.0f, float im_width = 1.0f, float im_height = 1.0f) - : _score_thresh(score_thresh), _nms(nms), _detections_per_im(detections), _soft_nms_enabled(soft_nms_enabled), _soft_nms_method(soft_nms_method), _soft_nms_sigma(soft_nms_sigma), - _soft_nms_min_score_thres(soft_nms_min_score_thres), _suppress_size(suppress_size), _min_size(min_size), _im_width(im_width), _im_height(im_height) + BoxNMSLimitInfo(float score_thresh = 0.05f, + float nms = 0.3f, + int detections = 100, + bool soft_nms_enabled = false, + NMSType soft_nms_method = NMSType::LINEAR, + float soft_nms_sigma = 0.5f, + float soft_nms_min_score_thres = 0.001f, + bool suppress_size = false, + float min_size = 1.0f, + float im_width = 1.0f, + float im_height = 1.0f) + : _score_thresh(score_thresh), + _nms(nms), + _detections_per_im(detections), + _soft_nms_enabled(soft_nms_enabled), + _soft_nms_method(soft_nms_method), + _soft_nms_sigma(soft_nms_sigma), + _soft_nms_min_score_thres(soft_nms_min_score_thres), + _suppress_size(suppress_size), + _min_size(min_size), + _im_width(im_width), + _im_height(im_height) { } /** Get the score threshold */ @@ -644,120 +605,42 @@ private: }; /** Padding and stride information class */ -class PadStrideInfo +/** Padding information for 2D operations like Conv2d */ +struct Padding2D { -public: - /** Constructor - * - * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1. - * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1. - * @param[in] pad_x (Optional) Padding, in elements, across x. Defaults to 0. - * @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0. - * @param[in] round (Optional) Dimensions rounding. Defaults to @ref FLOOR. - */ - PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1, - unsigned int pad_x = 0, unsigned int pad_y = 0, - DimensionRoundingType round = DimensionRoundingType::FLOOR) - : _stride(std::make_pair(stride_x, stride_y)), - _pad_left(pad_x), - _pad_top(pad_y), - _pad_right(pad_x), - _pad_bottom(pad_y), - _round_type(round) - { - } - /** Constructor - * - * @param[in] stride_x Stride, in elements, across x. - * @param[in] stride_y Stride, in elements, across y. - * @param[in] pad_left Padding across x on the left, in elements. - * @param[in] pad_top Padding across y on the top, in elements. - * @param[in] pad_right Padding across x on the right, in elements. - * @param[in] pad_bottom Padding across y on the bottom, in elements. - * @param[in] round Dimensions rounding. - */ - PadStrideInfo(unsigned int stride_x, unsigned int stride_y, - unsigned int pad_left, unsigned int pad_right, - unsigned int pad_top, unsigned int pad_bottom, - DimensionRoundingType round) - : _stride(std::make_pair(stride_x, stride_y)), - _pad_left(pad_left), - _pad_top(pad_top), - _pad_right(pad_right), - _pad_bottom(pad_bottom), - _round_type(round) - { - } - /** Get the stride. - * - * @return a pair: stride x, stride y. - */ - std::pair<unsigned int, unsigned int> stride() const - { - return _stride; - } - /** Check whether the padding is symmetric. - * - * @return True if the padding is symmetric. - */ - bool padding_is_symmetric() const - { - return (_pad_left == _pad_right) && (_pad_top == _pad_bottom); - } - /** Get the padding. - * - * @note This should only be used when the padding is symmetric. - * - * @return a pair: padding left/right, padding top/bottom - */ - std::pair<unsigned int, unsigned int> pad() const + Padding2D() = default; + Padding2D(size_t left, size_t right, size_t top, size_t bottom) : left(left), right(right), top(top), bottom(bottom) { - //this accessor should be used only when padding is symmetric - ARM_COMPUTE_ERROR_ON(!padding_is_symmetric()); - return std::make_pair(_pad_left, _pad_top); } + size_t left = {0}; /**< Padding across the width dimension on the left, in elements. */ + size_t right = {0}; /**< Padding across the width dimension on the right, in elements. */ + size_t top = {0}; /**< Padding across the height dimension on the top, in elements. */ + size_t bottom = {0}; /**< Padding across the height dimension on the bottom, in elements. */ +}; - /** Get the left padding */ - unsigned int pad_left() const - { - return _pad_left; - } - /** Get the right padding */ - unsigned int pad_right() const - { - return _pad_right; - } - /** Get the top padding */ - unsigned int pad_top() const - { - return _pad_top; - } - /** Get the bottom padding */ - unsigned int pad_bottom() const +/** Padding information for 3D operations like Conv3d */ +struct Padding3D +{ + Padding3D() noexcept { - return _pad_bottom; } - /** Get the rounding type */ - DimensionRoundingType round() const + Padding3D(size_t pad_x, size_t pad_y, size_t pad_z) + : left(pad_x), right(pad_x), top(pad_y), bottom(pad_y), front(pad_z), back(pad_z) { - return _round_type; } - /** Check whether this has any padding */ - bool has_padding() const + Padding3D(size_t left, size_t right, size_t top, size_t bottom, size_t front, size_t back) + : left(left), right(right), top(top), bottom(bottom), front(front), back(back) { - return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0); } -private: - std::pair<unsigned int, unsigned int> _stride; - unsigned int _pad_left; - unsigned int _pad_top; - unsigned int _pad_right; - unsigned int _pad_bottom; - - DimensionRoundingType _round_type; + size_t left = {0}; /**< Padding across the width dimenstion on the left, in elements. */ + size_t right = {0}; /**< Padding across the width dimenstion on the right, in elements. */ + size_t top = {0}; /**< Padding across the height dimenstion on the top, in elements. */ + size_t bottom = {0}; /**< Padding across the height dimenstion on the bottom, in elements. */ + size_t front = {0}; /**< Padding across the depth dimenstion on the front, in elements. */ + size_t back = {0}; /**< Padding across the depth dimenstion on the back, in elements. */ }; /** PriorBox layer info */ @@ -789,9 +672,15 @@ public: * @param[in] img_size (Optional) Image size. * @param[in] steps (Optional) Step values. */ - PriorBoxLayerInfo(const std::vector<float> &min_sizes, const std::vector<float> &variances, float offset, bool flip = true, bool clip = false, - const std::vector<float> &max_sizes = {}, const std::vector<float> &aspect_ratios = {}, - const Coordinates2D &img_size = Coordinates2D{ 0, 0 }, const std::array<float, 2> &steps = { { 0.f, 0.f } }) + PriorBoxLayerInfo(const std::vector<float> &min_sizes, + const std::vector<float> &variances, + float offset, + bool flip = true, + bool clip = false, + const std::vector<float> &max_sizes = {}, + const std::vector<float> &aspect_ratios = {}, + const Coordinates2D &img_size = Coordinates2D{0, 0}, + const std::array<float, 2> &steps = {{0.f, 0.f}}) : _min_sizes(min_sizes), _variances(variances), _offset(offset), @@ -803,22 +692,22 @@ public: _steps(steps) { _aspect_ratios.push_back(1.); - for(unsigned int i = 0; i < aspect_ratios.size(); ++i) + for (unsigned int i = 0; i < aspect_ratios.size(); ++i) { float ar = aspect_ratios[i]; bool already_exist = false; - for(auto ar_new : _aspect_ratios) + for (auto ar_new : _aspect_ratios) { - if(fabs(ar - ar_new) < 1e-6) + if (fabs(ar - ar_new) < 1e-6) { already_exist = true; break; } } - if(!already_exist) + if (!already_exist) { _aspect_ratios.push_back(ar); - if(flip) + if (flip) { _aspect_ratios.push_back(1.f / ar); } @@ -872,14 +761,14 @@ public: } private: - std::vector<float> _min_sizes; - std::vector<float> _variances; - float _offset; - bool _flip; - bool _clip; - std::vector<float> _max_sizes; - std::vector<float> _aspect_ratios; - Coordinates2D _img_size; + std::vector<float> _min_sizes; + std::vector<float> _variances; + float _offset; + bool _flip; + bool _clip; + std::vector<float> _max_sizes; + std::vector<float> _aspect_ratios; + Coordinates2D _img_size; std::array<float, 2> _steps; }; @@ -930,8 +819,16 @@ public: * @param[in] variance_encoded_in_target (Optional) If true, variance is encoded in target. Otherwise we need to adjust the predicted offset accordingly.Default set to false. * @param[in] eta (Optional) Eta. */ - DetectionOutputLayerInfo(int num_classes, bool share_location, DetectionOutputLayerCodeType code_type, int keep_top_k, float nms_threshold, int top_k = -1, int background_label_id = -1, - float confidence_threshold = std::numeric_limits<float>::lowest(), bool variance_encoded_in_target = false, float eta = 1) + DetectionOutputLayerInfo(int num_classes, + bool share_location, + DetectionOutputLayerCodeType code_type, + int keep_top_k, + float nms_threshold, + int top_k = -1, + int background_label_id = -1, + float confidence_threshold = std::numeric_limits<float>::lowest(), + bool variance_encoded_in_target = false, + float eta = 1) : _num_classes(num_classes), _share_location(share_location), _code_type(code_type), @@ -1045,8 +942,15 @@ public: * @param[in] detection_per_class (Optional) Number of detection per class. Used in the Regular Non-Max-Suppression. Defaults to 100. * @param[in] dequantize_scores (Optional) If the scores need to be dequantized. Defaults to true. */ - DetectionPostProcessLayerInfo(unsigned int max_detections, unsigned int max_classes_per_detection, float nms_score_threshold, float iou_threshold, unsigned int num_classes, - std::array<float, 4> scales_values, bool use_regular_nms = false, unsigned int detection_per_class = 100, bool dequantize_scores = true) + DetectionPostProcessLayerInfo(unsigned int max_detections, + unsigned int max_classes_per_detection, + float nms_score_threshold, + float iou_threshold, + unsigned int num_classes, + std::array<float, 4> scales_values, + bool use_regular_nms = false, + unsigned int detection_per_class = 100, + bool dequantize_scores = true) : _max_detections(max_detections), _max_classes_per_detection(max_classes_per_detection), _nms_score_threshold(nms_score_threshold), @@ -1124,15 +1028,15 @@ public: } private: - unsigned int _max_detections; - unsigned int _max_classes_per_detection; - float _nms_score_threshold; - float _iou_threshold; - unsigned int _num_classes; + unsigned int _max_detections; + unsigned int _max_classes_per_detection; + float _nms_score_threshold; + float _iou_threshold; + unsigned int _num_classes; std::array<float, 4> _scales_values; - bool _use_regular_nms; - unsigned int _detection_per_class; - bool _dequantize_scores; + bool _use_regular_nms; + unsigned int _detection_per_class; + bool _dequantize_scores; }; /** Pooling Layer Information struct*/ @@ -1146,7 +1050,9 @@ struct PoolingLayerInfo pad_stride_info(PadStrideInfo()), exclude_padding(false), is_global_pooling(false), - fp_mixed_precision(false) + fp_mixed_precision(false), + use_inf_as_limit(true), + use_kernel_indices(false) { } /** Constructor @@ -1159,20 +1065,26 @@ struct PoolingLayerInfo * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area). * Defaults to false; * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. + * @param[in] use_inf_as_limit (Optional) Use inf to represent the limits of datatypes range, instead of using "lowest" property of the data type. + * @param[in] use_kernel_indices (Optional) Use kernel indices instead of using source indices while computing indices tensor. */ explicit PoolingLayerInfo(PoolingType pool_type, unsigned int pool_size, DataLayout data_layout, PadStrideInfo pad_stride_info = PadStrideInfo(), bool exclude_padding = false, - bool fp_mixed_precision = false) + bool fp_mixed_precision = false, + bool use_inf_as_limit = true, + bool use_kernel_indices = false) : pool_type(pool_type), pool_size(Size2D(pool_size, pool_size)), data_layout(data_layout), pad_stride_info(pad_stride_info), exclude_padding(exclude_padding), is_global_pooling(false), - fp_mixed_precision(fp_mixed_precision) + fp_mixed_precision(fp_mixed_precision), + use_inf_as_limit(use_inf_as_limit), + use_kernel_indices(use_kernel_indices) { } @@ -1186,20 +1098,26 @@ struct PoolingLayerInfo * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area). * Defaults to false; * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. + * @param[in] use_inf_as_limit (Optional) Use inf to represent the limits of datatypes range, instead of using "lowest" property of the data type. + * @param[in] use_kernel_indices (Optional) Use kernel indices instead of using source indices while computing indices tensor. */ explicit PoolingLayerInfo(PoolingType pool_type, Size2D pool_size, DataLayout data_layout, PadStrideInfo pad_stride_info = PadStrideInfo(), bool exclude_padding = false, - bool fp_mixed_precision = false) + bool fp_mixed_precision = false, + bool use_inf_as_limit = true, + bool use_kernel_indices = false) : pool_type(pool_type), pool_size(pool_size), data_layout(data_layout), pad_stride_info(pad_stride_info), exclude_padding(exclude_padding), is_global_pooling(false), - fp_mixed_precision(fp_mixed_precision) + fp_mixed_precision(fp_mixed_precision), + use_inf_as_limit(use_inf_as_limit), + use_kernel_indices(use_kernel_indices) { } @@ -1217,7 +1135,9 @@ struct PoolingLayerInfo pad_stride_info(PadStrideInfo(1, 1, 0, 0)), exclude_padding(false), is_global_pooling(true), - fp_mixed_precision(false) + fp_mixed_precision(false), + use_inf_as_limit(true), + use_kernel_indices(false) { } @@ -1228,6 +1148,111 @@ struct PoolingLayerInfo bool exclude_padding; bool is_global_pooling; bool fp_mixed_precision; + bool use_inf_as_limit; + bool use_kernel_indices; +}; + +/** Pooling Layer Information struct*/ +struct Pooling3dLayerInfo +{ + /** Default Constructor */ + Pooling3dLayerInfo() noexcept + : pool_type(PoolingType::MAX), + pool_size(Size3D()), + stride(Size3D()), + padding(Padding3D()), + exclude_padding(false), + is_global_pooling(false), + fp_mixed_precision(false), + round_type(DimensionRoundingType::FLOOR) + { + } + /** Constructor + * + * @param[in] pool_type Pooling type @ref PoolingType. + * @param[in] pool_size Pooling size, in elements, across x, y and z. + * @param[in] stride (Optional) stride information @ref Size3D + * @param[in] padding (Optional) padding information @ref Padding3D + * @param[in] exclude_padding (Optional) Strategy when accounting padding in calculations. + * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area). + * Defaults to false; + * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. + * @param[in] round_type (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR + */ + explicit Pooling3dLayerInfo(PoolingType pool_type, + unsigned int pool_size, + Size3D stride = Size3D(1U, 1U, 1U), + Padding3D padding = Padding3D(), + bool exclude_padding = false, + bool fp_mixed_precision = false, + DimensionRoundingType round_type = DimensionRoundingType::FLOOR) + : pool_type(pool_type), + pool_size(Size3D(pool_size, pool_size, pool_size)), + stride(stride), + padding(padding), + exclude_padding(exclude_padding), + is_global_pooling(false), + fp_mixed_precision(fp_mixed_precision), + round_type(round_type) + { + } + + /** Constructor + * + * @param[in] pool_type Pooling type @ref PoolingType. + * @param[in] pool_size Pooling size, in elements, across x, y and z. + * @param[in] stride (Optional) stride information @ref Size3D + * @param[in] padding (Optional) padding information @ref Padding3D + * @param[in] exclude_padding (Optional) Strategy when accounting padding in calculations. + * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area). + * Defaults to false; + * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. + * @param[in] round_type (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR + */ + explicit Pooling3dLayerInfo(PoolingType pool_type, + Size3D pool_size, + Size3D stride = Size3D(1U, 1U, 1U), + Padding3D padding = Padding3D(), + bool exclude_padding = false, + bool fp_mixed_precision = false, + DimensionRoundingType round_type = DimensionRoundingType::FLOOR) + : pool_type(pool_type), + pool_size(pool_size), + stride(stride), + padding(padding), + exclude_padding(exclude_padding), + is_global_pooling(false), + fp_mixed_precision(fp_mixed_precision), + round_type(round_type) + { + } + + /** Constructor + * + * @note This constructor is used for global pooling + * + * @param[in] pool_type Pooling type @ref PoolingType. + */ + explicit Pooling3dLayerInfo(PoolingType pool_type) + : pool_type(pool_type), + pool_size(Size3D()), + stride(Size3D(1U, 1U, 1U)), + padding(Padding3D(0, 0, 0)), + exclude_padding(false), + is_global_pooling(true), + fp_mixed_precision(false), + round_type(DimensionRoundingType::FLOOR) + { + } + + PoolingType pool_type; + Size3D pool_size; + Size3D stride; + Padding3D padding; + bool exclude_padding; + bool is_global_pooling; + bool fp_mixed_precision; + DimensionRoundingType round_type; }; /** ROI Pooling Layer Information class */ @@ -1241,8 +1266,14 @@ public: * @param[in] spatial_scale Spatial scale to be applied to the ROI coordinates and dimensions. * @param[in] sampling_ratio Number of samples to include in each pooling region (if set to zero, a ceil(roi_dims/pooling_dims)) */ - ROIPoolingLayerInfo(unsigned int pooled_width, unsigned int pooled_height, float spatial_scale, unsigned int sampling_ratio = 0) - : _pooled_width(pooled_width), _pooled_height(pooled_height), _spatial_scale(spatial_scale), _sampling_ratio(sampling_ratio) + ROIPoolingLayerInfo(unsigned int pooled_width, + unsigned int pooled_height, + float spatial_scale, + unsigned int sampling_ratio = 0) + : _pooled_width(pooled_width), + _pooled_height(pooled_height), + _spatial_scale(spatial_scale), + _sampling_ratio(sampling_ratio) { } /** Get the pooled width of the layer */ @@ -1289,10 +1320,24 @@ public: * @param[in] min_size (Optional)Size used to validate the anchors produced. Defaults to 16. * @param[in] values_per_roi (Optional)Values used to represent a ROI(Region of interest). Defaults to 4. */ - GenerateProposalsInfo(float im_width, float im_height, float im_scale, float spatial_scale = 1.0, int pre_nms_topN = 6000, int post_nms_topN = 300, float nms_thres = 0.7, float min_size = 16.0, + GenerateProposalsInfo(float im_width, + float im_height, + float im_scale, + float spatial_scale = 1.0, + int pre_nms_topN = 6000, + int post_nms_topN = 300, + float nms_thres = 0.7, + float min_size = 16.0, size_t values_per_roi = 4) - : _im_height(im_height), _im_width(im_width), _im_scale(im_scale), _spatial_scale(spatial_scale), _pre_nms_topN(pre_nms_topN), _post_nms_topN(post_nms_topN), _nms_thres(nms_thres), - _min_size(min_size), _values_per_roi(values_per_roi) + : _im_height(im_height), + _im_width(im_width), + _im_scale(im_scale), + _spatial_scale(spatial_scale), + _pre_nms_topN(pre_nms_topN), + _post_nms_topN(post_nms_topN), + _nms_thres(nms_thres), + _min_size(min_size), + _values_per_roi(values_per_roi) { } @@ -1418,11 +1463,20 @@ public: * @param[in] correct_transform_coords (Optional)Correct bounding box transform coordinates. Defaults to false * @param[in] bbox_xform_clip (Optional)Minimum bounding box width and height after bounding box transformation in log-space. Defaults to log(1000/16) */ - BoundingBoxTransformInfo(float img_width, float img_height, float scale, bool apply_scale = false, const std::array<float, 4> weights = { { 1.f, 1.f, 1.f, 1.f } }, bool correct_transform_coords = - false, - float bbox_xform_clip = - 4.135166556742356f) - : _img_width(img_width), _img_height(img_height), _scale(scale), _apply_scale(apply_scale), _correct_transform_coords(correct_transform_coords), _weights(weights), _bbox_xform_clip(bbox_xform_clip) + BoundingBoxTransformInfo(float img_width, + float img_height, + float scale, + bool apply_scale = false, + const std::array<float, 4> weights = {{1.f, 1.f, 1.f, 1.f}}, + bool correct_transform_coords = false, + float bbox_xform_clip = 4.135166556742356f) + : _img_width(img_width), + _img_height(img_height), + _scale(scale), + _apply_scale(apply_scale), + _correct_transform_coords(correct_transform_coords), + _weights(weights), + _bbox_xform_clip(bbox_xform_clip) { } @@ -1462,114 +1516,13 @@ public: } private: - float _img_width; - float _img_height; - float _scale; - bool _apply_scale; - bool _correct_transform_coords; + float _img_width; + float _img_height; + float _scale; + bool _apply_scale; + bool _correct_transform_coords; std::array<float, 4> _weights; - float _bbox_xform_clip; -}; - -/** Activation Layer Information class */ -class ActivationLayerInfo -{ -public: - /** Available activation functions */ - enum class ActivationFunction - { - LOGISTIC, /**< Logistic ( \f$ f(x) = \frac{1}{1 + e^{-x}} \f$ ) */ - TANH, /**< Hyperbolic tangent ( \f$ f(x) = a \cdot tanh(b \cdot x) \f$ ) */ - RELU, /**< Rectifier ( \f$ f(x) = max(0,x) \f$ ) */ - BOUNDED_RELU, /**< Upper Bounded Rectifier ( \f$ f(x) = min(a, max(0,x)) \f$ ) */ - LU_BOUNDED_RELU, /**< Lower and Upper Bounded Rectifier ( \f$ f(x) = min(a, max(b,x)) \f$ ) */ - LEAKY_RELU, /**< Leaky Rectifier ( \f$ f(x) = \begin{cases} \alpha x & \quad \text{if } x \text{ < 0}\\ x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */ - SOFT_RELU, /**< Soft Rectifier ( \f$ f(x)= log(1+e^x) \f$ ) */ - ELU, /**< Exponential Linear Unit ( \f$ f(x) = \begin{cases} \alpha (exp(x) - 1) & \quad \text{if } x \text{ < 0}\\ x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */ - ABS, /**< Absolute ( \f$ f(x)= |x| \f$ ) */ - SQUARE, /**< Square ( \f$ f(x)= x^2 \f$ )*/ - SQRT, /**< Square root ( \f$ f(x) = \sqrt{x} \f$ )*/ - LINEAR, /**< Linear ( \f$ f(x)= ax + b \f$ ) */ - IDENTITY, /**< Identity ( \f$ f(x)= x \f$ ) */ - HARD_SWISH /**< Hard-swish ( \f$ f(x) = (x * relu6(x+3))/6 \f$ ) */ - }; - - ActivationLayerInfo() = default; - /** Default Constructor - * - * @param[in] f The activation function to use. - * @param[in] a (Optional) The alpha parameter used by some activation functions - * (@ref ActivationFunction::BOUNDED_RELU, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH). - * @param[in] b (Optional) The beta parameter used by some activation functions (@ref ActivationFunction::LINEAR, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::TANH). - */ - ActivationLayerInfo(ActivationFunction f, float a = 0.0f, float b = 0.0f) - : _act(f), _a(a), _b(b), _enabled(true) - { - } - /** Get the type of activation function */ - ActivationFunction activation() const - { - return _act; - } - /** Get the alpha value */ - float a() const - { - return _a; - } - /** Get the beta value */ - float b() const - { - return _b; - } - /** Check if initialised */ - bool enabled() const - { - return _enabled; - } - -private: - ActivationFunction _act = { ActivationLayerInfo::ActivationFunction::IDENTITY }; - float _a = {}; - float _b = {}; - bool _enabled = { false }; -}; - -/** Fully connected layer info */ -struct FullyConnectedLayerInfo -{ - /* Fused-activation parameters */ - ActivationLayerInfo activation_info{}; /**< Fused activation to apply after the matrix multiplication. */ - /* Information about weights */ - DataLayout weights_trained_layout{ DataLayout::NCHW }; /**< Layout that the weights have been trained with. */ - bool transpose_weights{ true }; /**< Transpose weights if true. */ - bool are_weights_reshaped{ false }; /**< Reshape the weights tensor if false. */ - bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */ - bool constant_weights{ true }; /**< If false, weights can vary between runs. */ - /* Other parameters */ - bool fp_mixed_precision{ false }; /**< Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */ - - /** Sets the weights trained data layout - * - * @param[in] layout Data layout that the weights were trained with - * - * @return Updated object - */ - FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout) - { - weights_trained_layout = layout; - return *this; - } - /** Sets the transpose weights flag - * - * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed - * - * @return Updated object - */ - FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights) - { - transpose_weights = should_transpose_weights; - return *this; - } + float _bbox_xform_clip; }; /** Normalization Layer Information class */ @@ -1586,7 +1539,12 @@ public: * @param[in] is_scaled (Optional) Boolean that specifies if alpha will be scaled by the normalization size or not. * Should be false to follow [Krichevksy 2012]. */ - NormalizationLayerInfo(NormType type, uint32_t norm_size = 5, float alpha = 0.0001f, float beta = 0.5f, float kappa = 1.f, bool is_scaled = true) + NormalizationLayerInfo(NormType type, + uint32_t norm_size = 5, + float alpha = 0.0001f, + float beta = 0.5f, + float kappa = 1.f, + bool is_scaled = true) : _type(type), _norm_size(norm_size), _alpha(alpha), _beta(beta), _kappa(kappa), _is_scaled(is_scaled) { } @@ -1690,13 +1648,36 @@ private: int32_t _shrink_axis_mask; }; +// OHWIo<interleave_by>i<block_by> +inline int interleave_by(const WeightFormat wf) +{ + return (static_cast<int>(wf) >> 8) & 0xFFF; +} +inline int block_by(const WeightFormat wf) +{ + return (static_cast<int>(wf) >> 20) & 0xF; +} +inline bool is_fixed_format(const WeightFormat &wf) +{ + return wf != WeightFormat::UNSPECIFIED && wf != WeightFormat::ANY; +} +inline bool is_fixed_format_fast_math(const WeightFormat &wf) +{ + return (static_cast<int>(wf) >> 4) & 0x1; +} + /** Convolution Layer Weights Information class. This class stores the necessary information to compute convolution layer when the weights are already reshaped */ class WeightsInfo { public: /** Default constructor */ WeightsInfo() - : _are_reshaped(false), _kernel_width(0), _kernel_height(0), _num_kernels(0), _retain_internal_weights(false) + : _are_reshaped(false), + _kernel_width(0), + _kernel_height(0), + _num_kernels(0), + _retain_internal_weights(false), + _weight_format(arm_compute::WeightFormat::UNSPECIFIED) { } /** Constructor @@ -1706,9 +1687,20 @@ public: * @param[in] kernel_height Kernel height. * @param[in] num_kernels Number of convolution kernels. * @param[in] retain_internal_weights (Optional) True if internal reshaped weights must be retained. Used for reconfiguration purposes. Default is false. + * @param[in] weight_format (Optional) arm_gemm:WeightFormat enumeration requested by the user. Default is arm_compute::WeightFormat::UNSPECIFIED. */ - WeightsInfo(bool are_reshaped, unsigned int kernel_width, unsigned int kernel_height, unsigned int num_kernels, bool retain_internal_weights = false) - : _are_reshaped(are_reshaped), _kernel_width(kernel_width), _kernel_height(kernel_height), _num_kernels(num_kernels), _retain_internal_weights(retain_internal_weights) + WeightsInfo(bool are_reshaped, + unsigned int kernel_width, + unsigned int kernel_height, + unsigned int num_kernels, + bool retain_internal_weights = false, + arm_compute::WeightFormat weight_format = arm_compute::WeightFormat::UNSPECIFIED) + : _are_reshaped(are_reshaped), + _kernel_width(kernel_width), + _kernel_height(kernel_height), + _num_kernels(num_kernels), + _retain_internal_weights(retain_internal_weights), + _weight_format(weight_format) { } /** Flag which specifies if the weights tensor has been reshaped. @@ -1739,21 +1731,39 @@ public: { return _retain_internal_weights; } + arm_compute::WeightFormat weight_format() const + { + return _weight_format; + } + void set_weight_format(arm_compute::WeightFormat weight_format) + { + _weight_format = weight_format; + } + + unsigned int kernel_width() const + { + return _kernel_width; + } + unsigned int kernel_height() const + { + return _kernel_height; + } private: - bool _are_reshaped; - unsigned int _kernel_width; - unsigned int _kernel_height; - unsigned int _num_kernels; - bool _retain_internal_weights; + bool _are_reshaped; + unsigned int _kernel_width; + unsigned int _kernel_height; + unsigned int _num_kernels; + bool _retain_internal_weights; + arm_compute::WeightFormat _weight_format; }; /** GEMM reshape information class. This class stores the necessary information about matrix A and matrix B reshape. * - * The matrix A can only be reshaped through @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel or @ref NEGEMMInterleave4x4Kernel + * The matrix A can only be reshaped through @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel or @ref cpu::kernels::CpuGemmInterleave4x4Kernel * Note: Optionally just for @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel is it possible to set mult_interleave4x4_height, the multiplication factor for the height of the 4x4 interleaved block * - * The matrix B can only be reshaped through @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel or @ref NEGEMMTranspose1xWKernel + * The matrix B can only be reshaped through @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel or @ref cpu::kernels::CpuGemmTranspose1xWKernel * Note: Optionally just for @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel is it possible to set mult_transpose1xW_width, the multiplication factor for the width of the 1xW transposed block * */ @@ -1762,7 +1772,14 @@ class GEMMReshapeInfo final public: /** Default constructor */ GEMMReshapeInfo() - : _m(1), _n(1), _k(1), _mult_transpose1xW_width(1), _mult_interleave4x4_height(1), _depth_output_gemm3d(0), _reinterpret_input_as_3d(false), _broadcast_bias(false) + : _m(1), + _n(1), + _k(1), + _mult_transpose1xW_width(1), + _mult_interleave4x4_height(1), + _depth_output_gemm3d(0), + _reinterpret_input_as_3d(false), + _broadcast_bias(false) { } /** Constructor @@ -1778,9 +1795,22 @@ public: * to perform 1x1 convolutions with the NHWC data layout) * @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix. */ - GEMMReshapeInfo(int m, int n, int k, int mult_transpose1xW_width = 1, int mult_interleave4x4_height = 1, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool broadcast_bias = false) - : _m(m), _n(n), _k(k), _mult_transpose1xW_width(mult_transpose1xW_width), _mult_interleave4x4_height(mult_interleave4x4_height), _depth_output_gemm3d(depth_output_gemm3d), - _reinterpret_input_as_3d(reinterpret_input_as_3d), _broadcast_bias(broadcast_bias) + GEMMReshapeInfo(int m, + int n, + int k, + int mult_transpose1xW_width = 1, + int mult_interleave4x4_height = 1, + int depth_output_gemm3d = 0, + bool reinterpret_input_as_3d = false, + bool broadcast_bias = false) + : _m(m), + _n(n), + _k(k), + _mult_transpose1xW_width(mult_transpose1xW_width), + _mult_interleave4x4_height(mult_interleave4x4_height), + _depth_output_gemm3d(depth_output_gemm3d), + _reinterpret_input_as_3d(reinterpret_input_as_3d), + _broadcast_bias(broadcast_bias) { } /** Number of matrix A rows @@ -1862,44 +1892,6 @@ private: bool _broadcast_bias; }; -struct ConvolutionInfo -{ - ConvolutionInfo() = default; - ConvolutionInfo(const PadStrideInfo &pad_stride_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation) - : pad_stride_info(pad_stride_info), depth_multiplier(depth_multiplier), act_info(act_info), dilation(dilation) - { - } - PadStrideInfo pad_stride_info{}; /**< Convolution info (Pads, strides,...) */ - unsigned int depth_multiplier{ 1 }; /**< Multiplier to apply to input's depth to retrieve the output depth. Defaults to 1 */ - ActivationLayerInfo act_info{}; /**< Fused activation to apply after convolution. */ - Size2D dilation{ Size2D(1, 1) }; /**< Dilation, in elements, across x and y. Defaults to (1, 1). */ -}; - -/** GEMMLowp output stage type */ -enum class GEMMLowpOutputStageType -{ - NONE, /**< No quantization */ - QUANTIZE_DOWN, /**< Quantize using an integer multiplication */ - QUANTIZE_DOWN_FIXEDPOINT, /**< Quantize using a fixed point multiplication */ - QUANTIZE_DOWN_FLOAT /**< Quantize using a floating point multiplication */ -}; - -/** GEMMLowp output stage info */ -struct GEMMLowpOutputStageInfo -{ - GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */ - int32_t gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */ - int32_t gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ - int32_t gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */ - int32_t gemmlowp_min_bound{ std::numeric_limits<int32_t>::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */ - int32_t gemmlowp_max_bound{ std::numeric_limits<int32_t>::max() }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */ - std::vector<int32_t> gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ - std::vector<int32_t> gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ - float gemmlowp_real_multiplier{ 0 }; /**< GEMMLowp output stage real multiplier used for quantizing to QASYMM8 */ - bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */ - DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */ -}; - /** GEMM LHS (Left Hand Side) matrix information */ struct GEMMLHSMatrixInfo { @@ -1908,11 +1900,11 @@ struct GEMMLHSMatrixInfo : m0(m), k0(k), v0(v), transpose(trans), interleave(inter) { } - unsigned int m0{ 1 }; /**< Number of rows processed by the matrix multiplication */ - unsigned int k0{ 1 }; /**< Number of partial accumulations performed by the matrix multiplication */ - unsigned int v0{ 1 }; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */ - bool transpose{ true }; /**< True if the (m0xk0) block has to be transposed before been stored */ - bool interleave{ true }; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */ + unsigned int m0{1}; /**< Number of rows processed by the matrix multiplication */ + unsigned int k0{1}; /**< Number of partial accumulations performed by the matrix multiplication */ + unsigned int v0{1}; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */ + bool transpose{true}; /**< True if the (m0xk0) block has to be transposed before been stored */ + bool interleave{true}; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */ }; /** GEMM RHS (Right Hand Side) matrix information */ @@ -1923,208 +1915,16 @@ struct GEMMRHSMatrixInfo : n0(n), k0(k), h0(h), transpose(trans), interleave(inter), export_to_cl_image(export_to_cl_img) { } - unsigned int n0{ 1 }; /**< Number of columns processed by the matrix multiplication */ - unsigned int k0{ 1 }; /**< Number of partial accumulations performed by the matrix multiplication */ - unsigned int h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ - bool transpose{ true }; /**< True if the (k0xn0) block has to be transposed before been stored */ - bool interleave{ true }; /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */ - bool export_to_cl_image{ false }; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */ + unsigned int n0{1}; /**< Number of columns processed by the matrix multiplication */ + unsigned int k0{1}; /**< Number of partial accumulations performed by the matrix multiplication */ + unsigned int h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ + bool transpose{true}; /**< True if the (k0xn0) block has to be transposed before been stored */ + bool interleave{true}; /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */ + bool export_to_cl_image{ + false}; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */ }; -/** GEMM information class. This class stores the necessary information to compute GEMM functions - * - * This object also contains the information about how matrix A and matrix B have been reshaped - * - */ -class GEMMInfo -{ -public: - /** Default constructor */ - GEMMInfo() noexcept - : _is_a_reshaped(false), - _is_b_reshaped(false), - _reshape_b_only_on_first_run(true), - _depth_output_gemm3d(0), - _reinterpret_input_as_3d(false), - _retain_internal_weights(false), - _gemmlowp_output_stage(), - _fp_mixed_precision(false), - _broadcast_bias(false), - _pretranpose_B(true), - _activation_info(), - _constant_weights(true) - { - } - /** Constructor - * - * @param[in] is_a_reshaped True if the matrix A has been reshaped - * @param[in] is_b_reshaped True if the matrix B has been reshaped - * @param[in] reshape_b_only_on_first_run Reshape matrix B only for the first run - * @param[in] depth_output_gemm3d (Optional) Depth (third dimension) of the output tensor to be used with the GEMM3D kernel - * If 0 the output will not be reinterpreted as 3D. Default 0 - * @param[in] reinterpret_input_as_3d (Optional) Reinterpret the input as 3D tensor. (i.e. this flag should be set to true when GEMM is used - * to perform 1x1 convolutions with the NHWC data layout) - * @param[in] retain_internal_weights (Optional) Retain the weights tensor from previous run - * @param[in] gemmlowp_output_stage (Optional) GEMMLowp Output stage info - * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. - * @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix. - * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication - * @param[in] constant_weights (Optional) Weights have constant values throughout multiple executions - */ - GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false, - GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool broadcast_bias = false, - const ActivationLayerInfo &activation_info = ActivationLayerInfo(), bool constant_weights = true) noexcept - : _is_a_reshaped(is_a_reshaped), - _is_b_reshaped(is_b_reshaped), - _reshape_b_only_on_first_run(reshape_b_only_on_first_run), - _depth_output_gemm3d(depth_output_gemm3d), - _reinterpret_input_as_3d(reinterpret_input_as_3d), - _retain_internal_weights(retain_internal_weights), - _gemmlowp_output_stage(gemmlowp_output_stage), - _fp_mixed_precision(fp_mixed_precision), - _broadcast_bias(broadcast_bias), - _pretranpose_B(reshape_b_only_on_first_run), - _activation_info(activation_info), - _constant_weights(constant_weights) - { - } - /** Flag which specifies if the matrix A has been reshaped - * - * @return True if the matrix A has been reshaped - */ - bool is_a_reshaped() const - { - return _is_a_reshaped; - }; - /** Flag which specifies if the matrix B has been reshaped - * - * @return True if the matrix B has been reshaped - */ - bool is_b_reshaped() const - { - return _is_b_reshaped; - }; - /** Flag which specifies if the reshape of matrix B should executed only for the first - * - * @note This flag could be set to TRUE when GEMM is used to accelerate convolution layer - * - * @return True if the reshaped of matrix B happens only for the first run - */ - bool reshape_b_only_on_first_run() const - { - return _reshape_b_only_on_first_run; - }; - /** Depth of the output when GEMM output is reinterpreted as 3D tensor - * - * @return the depth of the output tensor - */ - int depth_output_gemm3d() const - { - return _depth_output_gemm3d; - }; - /** Flag which specifies if the input tensor has to be reinterpreted as 3D - * - * @return True if the input tensor has to be reinterpreted as 3D tensor - */ - bool reinterpret_input_as_3d() const - { - return _reinterpret_input_as_3d; - }; - /** Flag which specifies if the weights tensor has to be retained from previous run - * - * @return True if the weights tensor has to be retained - */ - bool retain_internal_weights() const - { - return _retain_internal_weights; - }; - /** GEMMLowp output stage - * - * @return the GEMMLowp output stage info - */ - GEMMLowpOutputStageInfo gemmlowp_output_stage() const - { - return _gemmlowp_output_stage; - }; - /** Sets GEMMLowp output stage - * - * @param[in] output_stage Output stage to set - */ - void set_gemmlowp_output_stage(GEMMLowpOutputStageInfo &output_stage) - { - _gemmlowp_output_stage = output_stage; - }; - /** Flag which specifies if a wider accumulator should be used. - * - * @return True if a wider accumulator has to be used - */ - bool fp_mixed_precision() const - { - return _fp_mixed_precision; - }; - /** Flag which specifies whether to broadcast the shape of the bias tensor. - * - * @return True if the shape of the bias tensor is to be broadcasted. - */ - bool broadcast_bias() const - { - return _broadcast_bias; - }; - /** Flag which specifies whether b should be pre-transposed if supported. - * - * @return True if b should be pre-transposed else false. - */ - bool pretranpose_B() const - { - return _pretranpose_B; - }; - /** Set pre-transpose b flag - * - * @param[in] flag Flag to set - */ - void set_pretranpose_B(bool flag) - { - _pretranpose_B = flag; - } - /** Activation layer to apply after the matrix multiplication - * - * @return ActivationLayerInfo object - */ - ActivationLayerInfo activation_info() const - { - return _activation_info; - } - /** Set activation layer info - * - * @param[in] activation_info ActivationLayerInfo object to set - */ - void set_activation_info(const ActivationLayerInfo &activation_info) - { - _activation_info = activation_info; - } - /** Flag which specifies if the values of the weights tensor are constant throughout multiple executions or not - * - * @return True if the weights tensor is constant - */ - bool constant_weights() const - { - return _constant_weights; - }; - -private: - bool _is_a_reshaped; - bool _is_b_reshaped; - bool _reshape_b_only_on_first_run; - int _depth_output_gemm3d; - bool _reinterpret_input_as_3d; - bool _retain_internal_weights; - GEMMLowpOutputStageInfo _gemmlowp_output_stage; - bool _fp_mixed_precision; - bool _broadcast_bias; - bool _pretranpose_B; - ActivationLayerInfo _activation_info; - bool _constant_weights; -}; +class ITensorInfo; /** Winograd information */ struct WinogradInfo @@ -2137,16 +1937,23 @@ struct WinogradInfo * @param[in] conv_info Convolution info (Pads, strides) * @param[in] data_layout Data layout to use for the output tensor once the convolution has been applied */ - WinogradInfo(Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout) - : output_tile_size(output_tile_sz), kernel_size(kernel_sz), input_dimensions(input_dims), convolution_info(conv_info), output_data_layout(data_layout) - { - } - - Size2D output_tile_size{}; /**< Width and height of the output tile */ - Size2D kernel_size{}; /**< Width and height of the kernel*/ - Size2D input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */ - PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */ - DataLayout output_data_layout{ DataLayout::NCHW }; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */ + WinogradInfo( + Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout) + : output_tile_size(output_tile_sz), + kernel_size(kernel_sz), + input_dimensions(input_dims), + convolution_info(conv_info), + output_data_layout(data_layout) + { + } + + Size2D output_tile_size{}; /**< Width and height of the output tile */ + Size2D kernel_size{}; /**< Width and height of the kernel*/ + Size2D input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */ + PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */ + DataLayout output_data_layout{ + DataLayout:: + NCHW}; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */ }; /** IO formatting information class*/ @@ -2205,5 +2012,8 @@ struct IOFormatInfo /** Align columns */ bool align_columns; }; + +/** Class for holding information related to cropping */ +using CropInfo = Padding2D; } // namespace arm_compute -#endif /* ARM_COMPUTE_TYPES_H */ +#endif // ACL_ARM_COMPUTE_CORE_TYPES_H diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index af9a777a0c..a2146522f7 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,67 +26,29 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Rounding.h" #include "arm_compute/core/Types.h" -#include "arm_compute/core/Version.h" -#include <algorithm> -#include <cstdint> -#include <cstdlib> -#include <iomanip> +#include <cmath> #include <numeric> #include <sstream> #include <string> #include <type_traits> #include <unordered_map> #include <utility> -#include <vector> + +/* Convenience / backwards compatibility includes */ +#include "arm_compute/core/utils/ActivationFunctionUtils.h" +#include "arm_compute/core/utils/DataLayoutUtils.h" +#include "arm_compute/core/utils/DataTypeUtils.h" +#include "arm_compute/core/utils/FormatUtils.h" +#include "arm_compute/core/utils/InterpolationPolicyUtils.h" +#include "arm_compute/core/utils/StringUtils.h" namespace arm_compute { class ITensor; class ITensorInfo; - -/** Calculate the rounded up quotient of val / m. - * - * @param[in] val Value to divide and round up. - * @param[in] m Value to divide by. - * - * @return the result. - */ -template <typename S, typename T> -constexpr auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m) -{ - return (val + m - 1) / m; -} - -/** Computes the smallest number larger or equal to value that is a multiple of divisor. - * - * @param[in] value Lower bound value - * @param[in] divisor Value to compute multiple of. - * - * @return the result. - */ -template <typename S, typename T> -inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor) -{ - ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0); - return DIV_CEIL(value, divisor) * divisor; -} - -/** Computes the largest number smaller or equal to value that is a multiple of divisor. - * - * @param[in] value Upper bound value - * @param[in] divisor Value to compute multiple of. - * - * @return the result. - */ -template <typename S, typename T> -inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) * divisor) -{ - ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0); - return (value / divisor) * divisor; -} +class ActivationLayerInfo; /** Load an entire file in memory * @@ -97,627 +59,6 @@ inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) */ std::string read_file(const std::string &filename, bool binary); -/** The size in bytes of the data type - * - * @param[in] data_type Input data type - * - * @return The size in bytes of the data type - */ -inline size_t data_size_from_type(DataType data_type) -{ - switch(data_type) - { - case DataType::U8: - case DataType::S8: - case DataType::QSYMM8: - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::QSYMM8_PER_CHANNEL: - return 1; - case DataType::U16: - case DataType::S16: - case DataType::QSYMM16: - case DataType::QASYMM16: - case DataType::BFLOAT16: - case DataType::F16: - return 2; - case DataType::F32: - case DataType::U32: - case DataType::S32: - return 4; - case DataType::F64: - case DataType::U64: - case DataType::S64: - return 8; - case DataType::SIZET: - return sizeof(size_t); - default: - ARM_COMPUTE_ERROR("Invalid data type"); - return 0; - } -} - -/** The size in bytes of the pixel format - * - * @param[in] format Input format - * - * @return The size in bytes of the pixel format - */ -inline size_t pixel_size_from_format(Format format) -{ - switch(format) - { - case Format::U8: - return 1; - case Format::U16: - case Format::S16: - case Format::BFLOAT16: - case Format::F16: - case Format::UV88: - case Format::YUYV422: - case Format::UYVY422: - return 2; - case Format::RGB888: - return 3; - case Format::RGBA8888: - return 4; - case Format::U32: - case Format::S32: - case Format::F32: - return 4; - //Doesn't make sense for planar formats: - case Format::NV12: - case Format::NV21: - case Format::IYUV: - case Format::YUV444: - default: - ARM_COMPUTE_ERROR("Undefined pixel size for given format"); - return 0; - } -} - -/** The size in bytes of the data type - * - * @param[in] dt Input data type - * - * @return The size in bytes of the data type - */ -inline size_t element_size_from_data_type(DataType dt) -{ - switch(dt) - { - case DataType::S8: - case DataType::U8: - case DataType::QSYMM8: - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::QSYMM8_PER_CHANNEL: - return 1; - case DataType::U16: - case DataType::S16: - case DataType::QSYMM16: - case DataType::QASYMM16: - case DataType::BFLOAT16: - case DataType::F16: - return 2; - case DataType::U32: - case DataType::S32: - case DataType::F32: - return 4; - default: - ARM_COMPUTE_ERROR("Undefined element size for given data type"); - return 0; - } -} - -/** Return the data type used by a given single-planar pixel format - * - * @param[in] format Input format - * - * @return The size in bytes of the pixel format - */ -inline DataType data_type_from_format(Format format) -{ - switch(format) - { - case Format::U8: - case Format::UV88: - case Format::RGB888: - case Format::RGBA8888: - case Format::YUYV422: - case Format::UYVY422: - return DataType::U8; - case Format::U16: - return DataType::U16; - case Format::S16: - return DataType::S16; - case Format::U32: - return DataType::U32; - case Format::S32: - return DataType::S32; - case Format::BFLOAT16: - return DataType::BFLOAT16; - case Format::F16: - return DataType::F16; - case Format::F32: - return DataType::F32; - //Doesn't make sense for planar formats: - case Format::NV12: - case Format::NV21: - case Format::IYUV: - case Format::YUV444: - default: - ARM_COMPUTE_ERROR("Not supported data_type for given format"); - return DataType::UNKNOWN; - } -} - -/** Return the plane index of a given channel given an input format. - * - * @param[in] format Input format - * @param[in] channel Input channel - * - * @return The plane index of the specific channel of the specific format - */ -inline int plane_idx_from_channel(Format format, Channel channel) -{ - switch(format) - { - // Single planar formats have a single plane - case Format::U8: - case Format::U16: - case Format::S16: - case Format::U32: - case Format::S32: - case Format::BFLOAT16: - case Format::F16: - case Format::F32: - case Format::UV88: - case Format::RGB888: - case Format::RGBA8888: - case Format::YUYV422: - case Format::UYVY422: - return 0; - // Multi planar formats - case Format::NV12: - case Format::NV21: - { - // Channel U and V share the same plane of format UV88 - switch(channel) - { - case Channel::Y: - return 0; - case Channel::U: - case Channel::V: - return 1; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - case Format::IYUV: - case Format::YUV444: - { - switch(channel) - { - case Channel::Y: - return 0; - case Channel::U: - return 1; - case Channel::V: - return 2; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - default: - ARM_COMPUTE_ERROR("Not supported format"); - return 0; - } -} - -/** Return the channel index of a given channel given an input format. - * - * @param[in] format Input format - * @param[in] channel Input channel - * - * @return The channel index of the specific channel of the specific format - */ -inline int channel_idx_from_format(Format format, Channel channel) -{ - switch(format) - { - case Format::RGB888: - { - switch(channel) - { - case Channel::R: - return 0; - case Channel::G: - return 1; - case Channel::B: - return 2; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - case Format::RGBA8888: - { - switch(channel) - { - case Channel::R: - return 0; - case Channel::G: - return 1; - case Channel::B: - return 2; - case Channel::A: - return 3; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - case Format::YUYV422: - { - switch(channel) - { - case Channel::Y: - return 0; - case Channel::U: - return 1; - case Channel::V: - return 3; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - case Format::UYVY422: - { - switch(channel) - { - case Channel::Y: - return 1; - case Channel::U: - return 0; - case Channel::V: - return 2; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - case Format::NV12: - { - switch(channel) - { - case Channel::Y: - return 0; - case Channel::U: - return 0; - case Channel::V: - return 1; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - case Format::NV21: - { - switch(channel) - { - case Channel::Y: - return 0; - case Channel::U: - return 1; - case Channel::V: - return 0; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - case Format::YUV444: - case Format::IYUV: - { - switch(channel) - { - case Channel::Y: - return 0; - case Channel::U: - return 0; - case Channel::V: - return 0; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - default: - ARM_COMPUTE_ERROR("Not supported format"); - return 0; - } -} - -/** Return the number of planes for a given format - * - * @param[in] format Input format - * - * @return The number of planes for a given image format. - */ -inline size_t num_planes_from_format(Format format) -{ - switch(format) - { - case Format::U8: - case Format::S16: - case Format::U16: - case Format::S32: - case Format::U32: - case Format::BFLOAT16: - case Format::F16: - case Format::F32: - case Format::RGB888: - case Format::RGBA8888: - case Format::YUYV422: - case Format::UYVY422: - return 1; - case Format::NV12: - case Format::NV21: - return 2; - case Format::IYUV: - case Format::YUV444: - return 3; - default: - ARM_COMPUTE_ERROR("Not supported format"); - return 0; - } -} - -/** Return the number of channels for a given single-planar pixel format - * - * @param[in] format Input format - * - * @return The number of channels for a given image format. - */ -inline size_t num_channels_from_format(Format format) -{ - switch(format) - { - case Format::U8: - case Format::U16: - case Format::S16: - case Format::U32: - case Format::S32: - case Format::BFLOAT16: - case Format::F16: - case Format::F32: - return 1; - // Because the U and V channels are subsampled - // these formats appear like having only 2 channels: - case Format::YUYV422: - case Format::UYVY422: - return 2; - case Format::UV88: - return 2; - case Format::RGB888: - return 3; - case Format::RGBA8888: - return 4; - //Doesn't make sense for planar formats: - case Format::NV12: - case Format::NV21: - case Format::IYUV: - case Format::YUV444: - default: - return 0; - } -} - -/** Return the promoted data type of a given data type. - * - * @note If promoted data type is not supported an error will be thrown - * - * @param[in] dt Data type to get the promoted type of. - * - * @return Promoted data type - */ -inline DataType get_promoted_data_type(DataType dt) -{ - switch(dt) - { - case DataType::U8: - return DataType::U16; - case DataType::S8: - return DataType::S16; - case DataType::U16: - return DataType::U32; - case DataType::S16: - return DataType::S32; - case DataType::QSYMM8: - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::QSYMM8_PER_CHANNEL: - case DataType::QSYMM16: - case DataType::QASYMM16: - case DataType::BFLOAT16: - case DataType::F16: - case DataType::U32: - case DataType::S32: - case DataType::F32: - ARM_COMPUTE_ERROR("Unsupported data type promotions!"); - default: - ARM_COMPUTE_ERROR("Undefined data type!"); - } - return DataType::UNKNOWN; -} - -/** Compute the mininum and maximum values a data type can take - * - * @param[in] dt Data type to get the min/max bounds of - * - * @return A tuple (min,max) with the minimum and maximum values respectively wrapped in PixelValue. - */ -inline std::tuple<PixelValue, PixelValue> get_min_max(DataType dt) -{ - PixelValue min{}; - PixelValue max{}; - switch(dt) - { - case DataType::U8: - case DataType::QASYMM8: - { - min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::lowest())); - max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::max())); - break; - } - case DataType::S8: - case DataType::QSYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::QSYMM8_PER_CHANNEL: - { - min = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::lowest())); - max = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::max())); - break; - } - case DataType::U16: - case DataType::QASYMM16: - { - min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::lowest())); - max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::max())); - break; - } - case DataType::S16: - case DataType::QSYMM16: - { - min = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::lowest())); - max = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::max())); - break; - } - case DataType::U32: - { - min = PixelValue(std::numeric_limits<uint32_t>::lowest()); - max = PixelValue(std::numeric_limits<uint32_t>::max()); - break; - } - case DataType::S32: - { - min = PixelValue(std::numeric_limits<int32_t>::lowest()); - max = PixelValue(std::numeric_limits<int32_t>::max()); - break; - } - case DataType::BFLOAT16: - { - min = PixelValue(bfloat16::lowest()); - max = PixelValue(bfloat16::max()); - break; - } - case DataType::F16: - { - min = PixelValue(std::numeric_limits<half>::lowest()); - max = PixelValue(std::numeric_limits<half>::max()); - break; - } - case DataType::F32: - { - min = PixelValue(std::numeric_limits<float>::lowest()); - max = PixelValue(std::numeric_limits<float>::max()); - break; - } - default: - ARM_COMPUTE_ERROR("Undefined data type!"); - } - return std::make_tuple(min, max); -} - -/** Return true if the given format has horizontal subsampling. - * - * @param[in] format Format to determine subsampling. - * - * @return True if the format can be subsampled horizontaly. - */ -inline bool has_format_horizontal_subsampling(Format format) -{ - return (format == Format::YUYV422 || format == Format::UYVY422 || format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false; -} - -/** Return true if the given format has vertical subsampling. - * - * @param[in] format Format to determine subsampling. - * - * @return True if the format can be subsampled verticaly. - */ -inline bool has_format_vertical_subsampling(Format format) -{ - return (format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false; -} - -/** Adjust tensor shape size if width or height are odd for a given multi-planar format. No modification is done for other formats. - * - * @note Adding here a few links discussing the issue of odd size and sharing the same solution: - * <a href="https://android.googlesource.com/platform/frameworks/base/+/refs/heads/master/graphics/java/android/graphics/YuvImage.java">Android Source</a> - * <a href="https://groups.google.com/a/webmproject.org/forum/#!topic/webm-discuss/LaCKpqiDTXM">WebM</a> - * <a href="https://bugs.chromium.org/p/libyuv/issues/detail?id=198&can=1&q=odd%20width">libYUV</a> - * <a href="https://sourceforge.net/p/raw-yuvplayer/bugs/1/">YUVPlayer</a> * - * - * @param[in, out] shape Tensor shape of 2D size - * @param[in] format Format of the tensor - * - * @return The adjusted tensor shape. - */ -inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format) -{ - TensorShape output{ shape }; - - // Force width to be even for formats which require subsampling of the U and V channels - if(has_format_horizontal_subsampling(format)) - { - output.set(0, (output.x() + 1) & ~1U); - } - - // Force height to be even for formats which require subsampling of the U and V channels - if(has_format_vertical_subsampling(format)) - { - output.set(1, (output.y() + 1) & ~1U); - } - - return output; -} - -/** Calculate subsampled shape for a given format and channel - * - * @param[in] shape Shape of the tensor to calculate the extracted channel. - * @param[in] format Format of the tensor. - * @param[in] channel Channel to create tensor shape to be extracted. - * - * @return The subsampled tensor shape. - */ -inline TensorShape calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel = Channel::UNKNOWN) -{ - TensorShape output{ shape }; - - // Subsample shape only for U or V channel - if(Channel::U == channel || Channel::V == channel || Channel::UNKNOWN == channel) - { - // Subsample width for the tensor shape when channel is U or V - if(has_format_horizontal_subsampling(format)) - { - output.set(0, output.x() / 2U); - } - - // Subsample height for the tensor shape when channel is U or V - if(has_format_vertical_subsampling(format)) - { - output.set(1, output.y() / 2U); - } - } - - return output; -} - /** Permutes the given dimensions according the permutation vector * * @param[in,out] dimensions Dimensions to be permuted. @@ -728,7 +69,7 @@ template <typename T> inline void permute_strides(Dimensions<T> &dimensions, const PermutationVector &perm) { const auto old_dim = utility::make_array<Dimensions<T>::num_max_dimensions>(dimensions.begin(), dimensions.end()); - for(unsigned int i = 0; i < perm.num_dimensions(); ++i) + for (unsigned int i = 0; i < perm.num_dimensions(); ++i) { T dimension_val = old_dim[i]; dimensions.set(perm[i], dimension_val); @@ -746,7 +87,11 @@ inline void permute_strides(Dimensions<T> &dimensions, const PermutationVector & * * @return PadStrideInfo for SAME padding */ -PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info, DataLayout data_layout = DataLayout::NCHW, const Size2D &dilation = Size2D(1u, 1u), +PadStrideInfo calculate_same_pad(TensorShape input_shape, + TensorShape weights_shape, + PadStrideInfo conv_info, + DataLayout data_layout = DataLayout::NCHW, + const Size2D &dilation = Size2D(1u, 1u), const DimensionRoundingType &rounding_type = DimensionRoundingType::FLOOR); /** Returns expected width and height of the deconvolution's output tensor. @@ -759,8 +104,10 @@ PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_sh * * @return A pair with the new width in the first position and the new height in the second. */ -std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height, - unsigned int kernel_width, unsigned int kernel_height, +std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned int in_width, + unsigned int in_height, + unsigned int kernel_width, + unsigned int kernel_height, const PadStrideInfo &pad_stride_info); /** Returns expected width and height of output scaled tensor depending on dimensions rounding mode. @@ -774,8 +121,10 @@ std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned i * * @return A pair with the new width in the first position and the new height in the second. */ -std::pair<unsigned int, unsigned int> scaled_dimensions(int width, int height, - int kernel_width, int kernel_height, +std::pair<unsigned int, unsigned int> scaled_dimensions(int width, + int height, + int kernel_width, + int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation = Size2D(1U, 1U)); @@ -789,9 +138,29 @@ std::pair<unsigned int, unsigned int> scaled_dimensions(int width, int height, * * @return A pair with the new width in the first position and the new height in the second, returned values can be < 1 */ -std::pair<int, int> scaled_dimensions_signed(int width, int height, - int kernel_width, int kernel_height, - const PadStrideInfo &pad_stride_info); +std::pair<int, int> scaled_dimensions_signed( + int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info); + +/** Returns calculated width, height and depth of output scaled tensor depending on dimensions rounding mode. + * + * @param[in] width Width of input tensor + * @param[in] height Height of input tensor + * @param[in] depth Depth of input tensor + * @param[in] kernel_width Kernel width. + * @param[in] kernel_height Kernel height. + * @param[in] kernel_depth Kernel depth. + * @param[in] pool3d_info Pad and stride and round information for 3d pooling + * + * @return A tuple with the new width in the first position, the new height in the second, and the new depth in the third. + * Returned values can be < 1 + */ +std::tuple<int, int, int> scaled_3d_dimensions_signed(int width, + int height, + int depth, + int kernel_width, + int kernel_height, + int kernel_depth, + const Pooling3dLayerInfo &pool3d_info); /** Check if the given reduction operation should be handled in a serial way. * @@ -820,15 +189,9 @@ QuantizationInfo get_softmax_output_quantization_info(DataType input_type, bool * * @return The pair with minimum and maximum values */ -std::pair<int32_t, int32_t> get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info); - -/** Convert a tensor format into a string. - * - * @param[in] format @ref Format to be translated to string. - * - * @return The string describing the format. - */ -const std::string &string_from_format(Format format); +std::pair<int32_t, int32_t> get_quantized_activation_min_max(const ActivationLayerInfo &act_info, + DataType data_type, + UniformQuantizationInfo oq_info); /** Convert a channel identity into a string. * @@ -837,34 +200,7 @@ const std::string &string_from_format(Format format); * @return The string describing the channel. */ const std::string &string_from_channel(Channel channel); -/** Convert a data layout identity into a string. - * - * @param[in] dl @ref DataLayout to be translated to string. - * - * @return The string describing the data layout. - */ -const std::string &string_from_data_layout(DataLayout dl); -/** Convert a data type identity into a string. - * - * @param[in] dt @ref DataType to be translated to string. - * - * @return The string describing the data type. - */ -const std::string &string_from_data_type(DataType dt); -/** Translates a given activation function to a string. - * - * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string. - * - * @return The string describing the activation function. - */ -const std::string &string_from_activation_func(ActivationLayerInfo::ActivationFunction act); -/** Translates a given interpolation policy to a string. - * - * @param[in] policy @ref InterpolationPolicy to be translated to string. - * - * @return The string describing the interpolation policy. - */ -const std::string &string_from_interpolation_policy(InterpolationPolicy policy); + /** Translates a given border mode policy to a string. * * @param[in] border_mode @ref BorderMode to be translated to string. @@ -886,6 +222,30 @@ const std::string &string_from_norm_type(NormType type); * @return The string describing the pooling type. */ const std::string &string_from_pooling_type(PoolingType type); +/** Check if the pool region is entirely outside the input tensor + * + * @param[in] info @ref PoolingLayerInfo to be checked. + * + * @return True if the pool region is entirely outside the input tensor, False otherwise. + */ +bool is_pool_region_entirely_outside_input(const PoolingLayerInfo &info); +/** Check if the 3d pool region is entirely outside the input tensor + * + * @param[in] info @ref Pooling3dLayerInfo to be checked. + * + * @return True if the pool region is entirely outside the input tensor, False otherwise. + */ +bool is_pool_3d_region_entirely_outside_input(const Pooling3dLayerInfo &info); +/** Check if the 3D padding is symmetric i.e. padding in each opposite sides are euqal (left=right, top=bottom and front=back) + * + * @param[in] info @ref Padding3D input 3D padding object to check if it is symmetric + * + * @return True if padding is symmetric + */ +inline bool is_symmetric(const Padding3D &info) +{ + return ((info.left == info.right) && (info.top == info.bottom) && (info.front == info.back)); +} /** Translates a given GEMMLowp output stage to a string. * * @param[in] output_stage @ref GEMMLowpOutputStageInfo to be translated to string. @@ -901,13 +261,7 @@ const std::string &string_from_gemmlowp_output_stage(GEMMLowpOutputStageType out * @return String representation of the PixelValue through the given data type. */ std::string string_from_pixel_value(const PixelValue &value, const DataType data_type); -/** Convert a string to DataType - * - * @param[in] name The name of the data type - * - * @return DataType - */ -DataType data_type_from_name(const std::string &name); + /** Stores padding information before configuring a kernel * * @param[in] infos list of tensor infos to store the padding info for @@ -930,162 +284,6 @@ std::unordered_map<const ITensorInfo *, PaddingSize> get_padding_info(std::initi */ bool has_padding_changed(const std::unordered_map<const ITensorInfo *, PaddingSize> &padding_map); -/** Input Stream operator for @ref DataType - * - * @param[in] stream Stream to parse - * @param[out] data_type Output data type - * - * @return Updated stream - */ -inline ::std::istream &operator>>(::std::istream &stream, DataType &data_type) -{ - std::string value; - stream >> value; - data_type = data_type_from_name(value); - return stream; -} -/** Lower a given string. - * - * @param[in] val Given string to lower. - * - * @return The lowered string - */ -std::string lower_string(const std::string &val); - -/** Check if a given data type is of floating point type - * - * @param[in] dt Input data type. - * - * @return True if data type is of floating point type, else false. - */ -inline bool is_data_type_float(DataType dt) -{ - switch(dt) - { - case DataType::F16: - case DataType::F32: - return true; - default: - return false; - } -} - -/** Check if a given data type is of quantized type - * - * @note Quantized is considered a super-set of fixed-point and asymmetric data types. - * - * @param[in] dt Input data type. - * - * @return True if data type is of quantized type, else false. - */ -inline bool is_data_type_quantized(DataType dt) -{ - switch(dt) - { - case DataType::QSYMM8: - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::QSYMM8_PER_CHANNEL: - case DataType::QSYMM16: - case DataType::QASYMM16: - return true; - default: - return false; - } -} - -/** Check if a given data type is of asymmetric quantized type - * - * @param[in] dt Input data type. - * - * @return True if data type is of asymmetric quantized type, else false. - */ -inline bool is_data_type_quantized_asymmetric(DataType dt) -{ - switch(dt) - { - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::QASYMM16: - return true; - default: - return false; - } -} - -/** Check if a given data type is of asymmetric quantized signed type - * - * @param[in] dt Input data type. - * - * @return True if data type is of asymmetric quantized signed type, else false. - */ -inline bool is_data_type_quantized_asymmetric_signed(DataType dt) -{ - switch(dt) - { - case DataType::QASYMM8_SIGNED: - return true; - default: - return false; - } -} - -/** Check if a given data type is of symmetric quantized type - * - * @param[in] dt Input data type. - * - * @return True if data type is of symmetric quantized type, else false. - */ -inline bool is_data_type_quantized_symmetric(DataType dt) -{ - switch(dt) - { - case DataType::QSYMM8: - case DataType::QSYMM8_PER_CHANNEL: - case DataType::QSYMM16: - return true; - default: - return false; - } -} - -/** Check if a given data type is of per channel type - * - * @param[in] dt Input data type. - * - * @return True if data type is of per channel type, else false. - */ -inline bool is_data_type_quantized_per_channel(DataType dt) -{ - switch(dt) - { - case DataType::QSYMM8_PER_CHANNEL: - return true; - default: - return false; - } -} - -/** Create a string with the float in full precision. - * - * @param val Floating point value - * - * @return String with the floating point value. - */ -inline std::string float_to_string_with_full_precision(float val) -{ - std::stringstream ss; - ss.precision(std::numeric_limits<float>::max_digits10); - ss << val; - - if(val != static_cast<int>(val)) - { - ss << "f"; - } - - return ss.str(); -} - /** Returns the number of elements required to go from start to end with the wanted step * * @param[in] start start value @@ -1100,91 +298,6 @@ inline size_t num_of_elements_in_range(const float start, const float end, const return size_t(std::ceil((end - start) / step)); } -/** Returns true if the value can be represented by the given data type - * - * @param[in] val value to be checked - * @param[in] dt data type that is checked - * @param[in] qinfo (Optional) quantization info if the data type is QASYMM8 - * - * @return true if the data type can hold the value. - */ -template <typename T> -bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = QuantizationInfo()) -{ - switch(dt) - { - case DataType::U8: - { - const auto val_u8 = static_cast<uint8_t>(val); - return ((val_u8 == val) && val >= std::numeric_limits<uint8_t>::lowest() && val <= std::numeric_limits<uint8_t>::max()); - } - case DataType::QASYMM8: - { - double min = static_cast<double>(dequantize_qasymm8(0, qinfo)); - double max = static_cast<double>(dequantize_qasymm8(std::numeric_limits<uint8_t>::max(), qinfo)); - return ((double)val >= min && (double)val <= max); - } - case DataType::S8: - { - const auto val_s8 = static_cast<int8_t>(val); - return ((val_s8 == val) && val >= std::numeric_limits<int8_t>::lowest() && val <= std::numeric_limits<int8_t>::max()); - } - case DataType::U16: - { - const auto val_u16 = static_cast<uint16_t>(val); - return ((val_u16 == val) && val >= std::numeric_limits<uint16_t>::lowest() && val <= std::numeric_limits<uint16_t>::max()); - } - case DataType::S16: - { - const auto val_s16 = static_cast<int16_t>(val); - return ((val_s16 == val) && val >= std::numeric_limits<int16_t>::lowest() && val <= std::numeric_limits<int16_t>::max()); - } - case DataType::U32: - { - const auto val_u32 = static_cast<uint32_t>(val); - return ((val_u32 == val) && val >= std::numeric_limits<uint32_t>::lowest() && val <= std::numeric_limits<uint32_t>::max()); - } - case DataType::S32: - { - const auto val_s32 = static_cast<int32_t>(val); - return ((val_s32 == val) && val >= std::numeric_limits<int32_t>::lowest() && val <= std::numeric_limits<int32_t>::max()); - } - case DataType::BFLOAT16: - return (val >= bfloat16::lowest() && val <= bfloat16::max()); - case DataType::F16: - return (val >= std::numeric_limits<half>::lowest() && val <= std::numeric_limits<half>::max()); - case DataType::F32: - return (val >= std::numeric_limits<float>::lowest() && val <= std::numeric_limits<float>::max()); - default: - ARM_COMPUTE_ERROR("Data type not supported"); - return false; - } -} - -/** Returns the adjusted vector size in case it is less than the input's first dimension, getting rounded down to its closest valid vector size - * - * @param[in] vec_size vector size to be adjusted - * @param[in] dim0 size of the first dimension - * - * @return the number of element processed along the X axis per thread - */ -inline unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0) -{ - ARM_COMPUTE_ERROR_ON(vec_size > 16); - - if((vec_size >= dim0) && (dim0 == 3)) - { - return dim0; - } - - while(vec_size > dim0) - { - vec_size >>= 1; - } - - return vec_size; -} - #ifdef ARM_COMPUTE_ASSERTS_ENABLED /** Print consecutive elements to an output stream. * @@ -1195,26 +308,27 @@ inline unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0) * @param[in] element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter */ template <typename T> -void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ") +void print_consecutive_elements_impl( + std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ") { using print_type = typename std::conditional<std::is_floating_point<T>::value, T, int>::type; std::ios stream_status(nullptr); stream_status.copyfmt(s); - for(unsigned int i = 0; i < n; ++i) + for (unsigned int i = 0; i < n; ++i) { // Set stream width as it is not a "sticky" stream manipulator - if(stream_width != 0) + if (stream_width != 0) { s.width(stream_width); } - if(std::is_same<typename std::decay<T>::type, half>::value) + if (std::is_same<typename std::decay<T>::type, half>::value) { // We use T instead of print_type here is because the std::is_floating_point<half> returns false and then the print_type becomes int. s << std::right << static_cast<T>(ptr[i]) << element_delim; } - else if(std::is_same<typename std::decay<T>::type, bfloat16>::value) + else if (std::is_same<typename std::decay<T>::type, bfloat16>::value) { // We use T instead of print_type here is because the std::is_floating_point<bfloat16> returns false and then the print_type becomes int. s << std::right << float(ptr[i]) << element_delim; @@ -1243,17 +357,17 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u using print_type = typename std::conditional<std::is_floating_point<T>::value, T, int>::type; int max_width = -1; - for(unsigned int i = 0; i < n; ++i) + for (unsigned int i = 0; i < n; ++i) { std::stringstream ss; ss.copyfmt(s); - if(std::is_same<typename std::decay<T>::type, half>::value) + if (std::is_same<typename std::decay<T>::type, half>::value) { // We use T instead of print_type here is because the std::is_floating_point<half> returns false and then the print_type becomes int. ss << static_cast<T>(ptr[i]); } - else if(std::is_same<typename std::decay<T>::type, bfloat16>::value) + else if (std::is_same<typename std::decay<T>::type, bfloat16>::value) { // We use T instead of print_type here is because the std::is_floating_point<bfloat> returns false and then the print_type becomes int. ss << float(ptr[i]); @@ -1277,7 +391,12 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u * @param[in] stream_width (Optional) Width of the stream. If set to 0 the element's width is used. Defaults to 0. * @param[in] element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter */ -void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n, int stream_width, const std::string &element_delim = " "); +void print_consecutive_elements(std::ostream &s, + DataType dt, + const uint8_t *ptr, + unsigned int n, + int stream_width, + const std::string &element_delim = " "); /** Identify the maximum width of n consecutive elements. * @@ -1290,5 +409,5 @@ void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr */ int max_consecutive_elements_display_width(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n); #endif /* ARM_COMPUTE_ASSERTS_ENABLED */ -} +} // namespace arm_compute #endif /*ARM_COMPUTE_UTILS_H */ diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h index e755cacae6..5550560aff 100644 --- a/arm_compute/core/Validate.h +++ b/arm_compute/core/Validate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Arm Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,10 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/IKernel.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/QuantizationInfo.h" +#include "arm_compute/core/utils/DataLayoutUtils.h" +#include "arm_compute/core/utils/DataTypeUtils.h" +#include "arm_compute/core/utils/FormatUtils.h" #include "arm_compute/core/Window.h" #include <algorithm> @@ -46,9 +50,9 @@ namespace detail template <typename T> inline bool have_different_dimensions(const Dimensions<T> &dim1, const Dimensions<T> &dim2, unsigned int upper_dim) { - for(unsigned int i = upper_dim; i < arm_compute::Dimensions<T>::num_max_dimensions; ++i) + for (unsigned int i = upper_dim; i < arm_compute::Dimensions<T>::num_max_dimensions; ++i) { - if(dim1[i] != dim2[i]) + if (dim1[i] != dim2[i]) { return true; } @@ -76,7 +80,7 @@ public: * @param[in] line Source code line. Used for error reporting. */ compare_dimension(const Dimensions<T> &dim, const char *function, const char *file, int line) - : _dim{ dim }, _function{ function }, _file{ file }, _line{ line } + : _dim{dim}, _function{function}, _file{file}, _line{line} { } @@ -107,7 +111,7 @@ inline arm_compute::Status for_each_error(F &&) } template <typename F, typename T, typename... Ts> -inline arm_compute::Status for_each_error(F &&func, T &&arg, Ts &&... args) +inline arm_compute::Status for_each_error(F &&func, T &&arg, Ts &&...args) { ARM_COMPUTE_RETURN_ON_ERROR(func(arg)); ARM_COMPUTE_RETURN_ON_ERROR(for_each_error(func, args...)); @@ -144,13 +148,11 @@ struct get_tensor_info_t<ITensorInfo *> * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_nullptr(const char *function, const char *file, const int line, Ts &&... pointers) +inline arm_compute::Status error_on_nullptr(const char *function, const char *file, const int line, Ts &&...pointers) { - const std::array<const void *, sizeof...(Ts)> pointers_array{ { std::forward<Ts>(pointers)... } }; - bool has_nullptr = std::any_of(pointers_array.begin(), pointers_array.end(), [&](const void *ptr) - { - return (ptr == nullptr); - }); + const std::array<const void *, sizeof...(Ts)> pointers_array{{std::forward<Ts>(pointers)...}}; + bool has_nullptr = + std::any_of(pointers_array.begin(), pointers_array.end(), [&](const void *ptr) { return (ptr == nullptr); }); ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(has_nullptr, function, file, line, "Nullptr object!"); return arm_compute::Status{}; } @@ -174,8 +176,8 @@ inline arm_compute::Status error_on_nullptr(const char *function, const char *fi * * @return Status */ -arm_compute::Status error_on_mismatching_windows(const char *function, const char *file, const int line, - const Window &full, const Window &win); +arm_compute::Status error_on_mismatching_windows( + const char *function, const char *file, const int line, const Window &full, const Window &win); #define ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(f, w) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_windows(__func__, __FILE__, __LINE__, f, w)) #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_WINDOWS(f, w) \ @@ -196,8 +198,8 @@ arm_compute::Status error_on_mismatching_windows(const char *function, const cha * * @return Status */ -arm_compute::Status error_on_invalid_subwindow(const char *function, const char *file, const int line, - const Window &full, const Window &sub); +arm_compute::Status error_on_invalid_subwindow( + const char *function, const char *file, const int line, const Window &full, const Window &sub); #define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subwindow(__func__, __FILE__, __LINE__, f, s)) #define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBWINDOW(f, s) \ @@ -216,12 +218,14 @@ arm_compute::Status error_on_invalid_subwindow(const char *function, const char * * @return Status */ -arm_compute::Status error_on_window_not_collapsable_at_dimension(const char *function, const char *file, const int line, - const Window &full, const Window &window, const int dim); +arm_compute::Status error_on_window_not_collapsable_at_dimension( + const char *function, const char *file, const int line, const Window &full, const Window &window, const int dim); #define ARM_COMPUTE_ERROR_ON_WINDOW_NOT_COLLAPSABLE_AT_DIMENSION(f, w, d) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d)) #define ARM_COMPUTE_RETURN_ERROR_ON_WINDOW_NOT_COLLAPSABLE_AT_DIMENSION(f, w, d) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d)) /** Return an error if the passed coordinates have too many dimensions. * @@ -235,8 +239,8 @@ arm_compute::Status error_on_window_not_collapsable_at_dimension(const char *fun * * @return Status */ -arm_compute::Status error_on_coordinates_dimensions_gte(const char *function, const char *file, const int line, - const Coordinates &pos, unsigned int max_dim); +arm_compute::Status error_on_coordinates_dimensions_gte( + const char *function, const char *file, const int line, const Coordinates &pos, unsigned int max_dim); #define ARM_COMPUTE_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_coordinates_dimensions_gte(__func__, __FILE__, __LINE__, p, md)) #define ARM_COMPUTE_RETURN_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) \ @@ -254,8 +258,8 @@ arm_compute::Status error_on_coordinates_dimensions_gte(const char *function, co * * @return Status */ -arm_compute::Status error_on_window_dimensions_gte(const char *function, const char *file, const int line, - const Window &win, unsigned int max_dim); +arm_compute::Status error_on_window_dimensions_gte( + const char *function, const char *file, const int line, const Window &win, unsigned int max_dim); #define ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_window_dimensions_gte(__func__, __FILE__, __LINE__, w, md)) #define ARM_COMPUTE_RETURN_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) \ @@ -273,16 +277,82 @@ arm_compute::Status error_on_window_dimensions_gte(const char *function, const c * @return Status */ template <typename T, typename... Ts> -arm_compute::Status error_on_mismatching_dimensions(const char *function, const char *file, int line, - const Dimensions<T> &dim1, const Dimensions<T> &dim2, Ts &&... dims) +arm_compute::Status error_on_mismatching_dimensions(const char *function, + const char *file, + int line, + const Dimensions<T> &dim1, + const Dimensions<T> &dim2, + Ts &&...dims) { - ARM_COMPUTE_RETURN_ON_ERROR(detail::for_each_error(detail::compare_dimension<T>(dim1, function, file, line), dim2, std::forward<Ts>(dims)...)); + ARM_COMPUTE_RETURN_ON_ERROR(detail::for_each_error(detail::compare_dimension<T>(dim1, function, file, line), dim2, + std::forward<Ts>(dims)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(...) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__)) #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__)) + +/** Return true if the given format has horizontal subsampling. + * + * @param[in] format Format to determine subsampling. + * + * @return True if the format can be subsampled horizontaly. + */ +inline bool has_format_horizontal_subsampling(Format format) +{ + return (format == Format::YUYV422 || format == Format::UYVY422 || format == Format::NV12 || + format == Format::NV21 || format == Format::IYUV || format == Format::UV88) + ? true + : false; +} + +/** Return true if the given format has vertical subsampling. + * + * @param[in] format Format to determine subsampling. + * + * @return True if the format can be subsampled verticaly. + */ +inline bool has_format_vertical_subsampling(Format format) +{ + return (format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) + ? true + : false; +} + +/** Adjust tensor shape size if width or height are odd for a given multi-planar format. No modification is done for other formats. + * + * @note Adding here a few links discussing the issue of odd size and sharing the same solution: + * <a href="https://android.googlesource.com/platform/frameworks/base/+/refs/heads/master/graphics/java/android/graphics/YuvImage.java">Android Source</a> + * <a href="https://groups.google.com/a/webmproject.org/forum/#!topic/webm-discuss/LaCKpqiDTXM">WebM</a> + * <a href="https://bugs.chromium.org/p/libyuv/issues/detail?id=198&can=1&q=odd%20width">libYUV</a> + * <a href="https://sourceforge.net/p/raw-yuvplayer/bugs/1/">YUVPlayer</a> * + * + * @param[in, out] shape Tensor shape of 2D size + * @param[in] format Format of the tensor + * + * @return The adjusted tensor shape. + */ +inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format) +{ + TensorShape output{shape}; + + // Force width to be even for formats which require subsampling of the U and V channels + if (has_format_horizontal_subsampling(format)) + { + output.set(0, (output.x() + 1) & ~1U); + } + + // Force height to be even for formats which require subsampling of the U and V channels + if (has_format_vertical_subsampling(format)) + { + output.set(1, (output.y() + 1) & ~1U); + } + + return output; +} /** Return an error if the passed tensor objects are not even. * @@ -296,18 +366,20 @@ arm_compute::Status error_on_mismatching_dimensions(const char *function, const * @return Status */ template <typename... Ts> -arm_compute::Status error_on_tensors_not_even(const char *function, const char *file, int line, - const Format &format, const ITensor *tensor1, Ts... tensors) +arm_compute::Status error_on_tensors_not_even( + const char *function, const char *file, int line, const Format &format, const ITensor *tensor1, Ts... tensors) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor1 == nullptr, function, file, line); ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...)); - const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_info_array{ { tensor1, std::forward<Ts>(tensors)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), [&](const ITensor * tensor) - { - const TensorShape correct_shape = adjust_odd_shape(tensor->info()->tensor_shape(), format); - return detail::have_different_dimensions(tensor->info()->tensor_shape(), correct_shape, 2); - }), - function, file, line, "Tensor shape has odd dimensions"); + const std::array<const ITensor *, 1 + sizeof...(Ts)> tensors_info_array{{tensor1, std::forward<Ts>(tensors)...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG( + std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), + [&](const ITensor *tensor) + { + const TensorShape correct_shape = adjust_odd_shape(tensor->info()->tensor_shape(), format); + return detail::have_different_dimensions(tensor->info()->tensor_shape(), correct_shape, 2); + }), + function, file, line, "Tensor shape has odd dimensions"); return arm_compute::Status{}; } @@ -316,6 +388,38 @@ arm_compute::Status error_on_tensors_not_even(const char *function, const char * #define ARM_COMPUTE_RETURN_ERROR_ON_TENSORS_NOT_EVEN(...) \ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_tensors_not_even(__func__, __FILE__, __LINE__, __VA_ARGS__)) +/** Calculate subsampled shape for a given format and channel + * + * @param[in] shape Shape of the tensor to calculate the extracted channel. + * @param[in] format Format of the tensor. + * @param[in] channel Channel to create tensor shape to be extracted. + * + * @return The subsampled tensor shape. + */ +inline TensorShape +calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel = Channel::UNKNOWN) +{ + TensorShape output{shape}; + + // Subsample shape only for U or V channel + if (Channel::U == channel || Channel::V == channel || Channel::UNKNOWN == channel) + { + // Subsample width for the tensor shape when channel is U or V + if (has_format_horizontal_subsampling(format)) + { + output.set(0, output.x() / 2U); + } + + // Subsample height for the tensor shape when channel is U or V + if (has_format_vertical_subsampling(format)) + { + output.set(1, output.y() / 2U); + } + } + + return output; +} + /** Return an error if the passed tensor objects are not sub-sampled. * * @param[in] function Function in which the error occurred. @@ -329,25 +433,32 @@ arm_compute::Status error_on_tensors_not_even(const char *function, const char * * @return Status */ template <typename... Ts> -arm_compute::Status error_on_tensors_not_subsampled(const char *function, const char *file, int line, - const Format &format, const TensorShape &shape, const ITensor *tensor1, Ts... tensors) +arm_compute::Status error_on_tensors_not_subsampled(const char *function, + const char *file, + int line, + const Format &format, + const TensorShape &shape, + const ITensor *tensor1, + Ts... tensors) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor1 == nullptr, function, file, line); ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...)); - const TensorShape sub2_shape = calculate_subsampled_shape(shape, format); - const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_info_array{ { tensor1, std::forward<Ts>(tensors)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), [&](const ITensor * tensor) - { - return detail::have_different_dimensions(tensor->info()->tensor_shape(), sub2_shape, 2); - }), - function, file, line, "Tensor shape has mismatch dimensions for sub-sampling"); + const TensorShape sub2_shape = calculate_subsampled_shape(shape, format); + const std::array<const ITensor *, 1 + sizeof...(Ts)> tensors_info_array{{tensor1, std::forward<Ts>(tensors)...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG( + std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), + [&](const ITensor *tensor) + { return detail::have_different_dimensions(tensor->info()->tensor_shape(), sub2_shape, 2); }), + function, file, line, "Tensor shape has mismatch dimensions for sub-sampling"); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(...) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__)) #define ARM_COMPUTE_RETURN_ERROR_ON_TENSORS_NOT_SUBSAMPLED(...) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__)) /** Return an error if the passed two tensor infos have different shapes from the given dimension * @@ -361,10 +472,15 @@ arm_compute::Status error_on_tensors_not_subsampled(const char *function, const * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos) +inline arm_compute::Status error_on_mismatching_shapes(const char *function, + const char *file, + const int line, + const ITensorInfo *tensor_info_1, + const ITensorInfo *tensor_info_2, + Ts... tensor_infos) { - return error_on_mismatching_shapes(function, file, line, 0U, tensor_info_1, tensor_info_2, std::forward<Ts>(tensor_infos)...); + return error_on_mismatching_shapes(function, file, line, 0U, tensor_info_1, tensor_info_2, + std::forward<Ts>(tensor_infos)...); } /** Return an error if the passed two tensors have different shapes from the given dimension * @@ -378,8 +494,12 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line, - const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +inline arm_compute::Status error_on_mismatching_shapes(const char *function, + const char *file, + const int line, + const ITensor *tensor_1, + const ITensor *tensor_2, + Ts... tensors) { return error_on_mismatching_shapes(function, file, line, 0U, tensor_1, tensor_2, std::forward<Ts>(tensors)...); } @@ -396,19 +516,28 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line, - unsigned int upper_dim, const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos) +inline arm_compute::Status error_on_mismatching_shapes(const char *function, + const char *file, + const int line, + unsigned int upper_dim, + const ITensorInfo *tensor_info_1, + const ITensorInfo *tensor_info_2, + Ts... tensor_infos) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info_1 == nullptr, function, file, line); ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info_2 == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensor_infos)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...)); - const std::array < const ITensorInfo *, 2 + sizeof...(Ts) > tensors_info_array{ { tensor_info_1, tensor_info_2, std::forward<Ts>(tensor_infos)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_info_array.cbegin()), tensors_info_array.cend(), [&](const ITensorInfo * tensor_info) - { - return detail::have_different_dimensions((*tensors_info_array.cbegin())->tensor_shape(), tensor_info->tensor_shape(), upper_dim); - }), - function, file, line, "Tensors have different shapes"); + const std::array<const ITensorInfo *, 2 + sizeof...(Ts)> tensors_info_array{ + {tensor_info_1, tensor_info_2, tensor_infos...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_info_array.cbegin()), tensors_info_array.cend(), + [&](const ITensorInfo *tensor_info) + { + return detail::have_different_dimensions( + (*tensors_info_array.cbegin())->tensor_shape(), + tensor_info->tensor_shape(), upper_dim); + }), + function, file, line, "Tensors have different shapes"); return arm_compute::Status{}; } /** Return an error if the passed two tensors have different shapes from the given dimension @@ -424,14 +553,20 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line, - unsigned int upper_dim, const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +inline arm_compute::Status error_on_mismatching_shapes(const char *function, + const char *file, + const int line, + unsigned int upper_dim, + const ITensor *tensor_1, + const ITensor *tensor_2, + Ts... tensors) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_1 == nullptr, function, file, line); ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_2 == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...)); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_shapes(function, file, line, upper_dim, tensor_1->info(), tensor_2->info(), - detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensors...)); + ARM_COMPUTE_RETURN_ON_ERROR( + ::arm_compute::error_on_mismatching_shapes(function, file, line, upper_dim, tensor_1->info(), tensor_2->info(), + detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(...) \ @@ -450,19 +585,18 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_data_layouts(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info, Ts... tensor_infos) +inline arm_compute::Status error_on_mismatching_data_layouts( + const char *function, const char *file, const int line, const ITensorInfo *tensor_info, Ts... tensor_infos) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensor_infos)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...)); - DataLayout &&tensor_data_layout = tensor_info->data_layout(); - const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{ { std::forward<Ts>(tensor_infos)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), [&](const ITensorInfo * tensor_info_obj) - { - return tensor_info_obj->data_layout() != tensor_data_layout; - }), - function, file, line, "Tensors have different data layouts"); + DataLayout &&tensor_data_layout = tensor_info->data_layout(); + const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{{tensor_infos...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), + [&](const ITensorInfo *tensor_info_obj) + { return tensor_info_obj->data_layout() != tensor_data_layout; }), + function, file, line, "Tensors have different data layouts"); return arm_compute::Status{}; } /** Return an error if the passed tensors have different data layouts @@ -476,19 +610,21 @@ inline arm_compute::Status error_on_mismatching_data_layouts(const char *functio * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_data_layouts(const char *function, const char *file, const int line, - const ITensor *tensor, Ts... tensors) +inline arm_compute::Status error_on_mismatching_data_layouts( + const char *function, const char *file, const int line, const ITensor *tensor, Ts... tensors) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...)); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(function, file, line, tensor->info(), - detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts( + function, file, line, tensor->info(), detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_LAYOUT(...) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__)) #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__)) /** Return an error if the passed two tensor infos have different data types * @@ -501,19 +637,18 @@ inline arm_compute::Status error_on_mismatching_data_layouts(const char *functio * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_data_types(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info, Ts... tensor_infos) +inline arm_compute::Status error_on_mismatching_data_types( + const char *function, const char *file, const int line, const ITensorInfo *tensor_info, Ts... tensor_infos) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensor_infos)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...)); - DataType &&tensor_data_type = tensor_info->data_type(); - const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{ { std::forward<Ts>(tensor_infos)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), [&](const ITensorInfo * tensor_info_obj) - { - return tensor_info_obj->data_type() != tensor_data_type; - }), - function, file, line, "Tensors have different data types"); + DataType &&tensor_data_type = tensor_info->data_type(); + const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{{tensor_infos...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), + [&](const ITensorInfo *tensor_info_obj) + { return tensor_info_obj->data_type() != tensor_data_type; }), + function, file, line, "Tensors have different data types"); return arm_compute::Status{}; } /** Return an error if the passed two tensors have different data types @@ -527,19 +662,21 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function, * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_data_types(const char *function, const char *file, const int line, - const ITensor *tensor, Ts... tensors) +inline arm_compute::Status error_on_mismatching_data_types( + const char *function, const char *file, const int line, const ITensor *tensor, Ts... tensors) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...)); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(function, file, line, tensor->info(), - detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensors...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types( + function, file, line, tensor->info(), detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__)) #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__)) /** Return an error if the passed tensor infos have different asymmetric quantized data types or different quantization info * @@ -555,28 +692,32 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function, * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos) +inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, + const char *file, + const int line, + const ITensorInfo *tensor_info_1, + const ITensorInfo *tensor_info_2, + Ts... tensor_infos) { DataType &&first_data_type = tensor_info_1->data_type(); const QuantizationInfo first_quantization_info = tensor_info_1->quantization_info(); - if(!is_data_type_quantized(first_data_type)) + if (!is_data_type_quantized(first_data_type)) { return arm_compute::Status{}; } - const std::array < const ITensorInfo *, 1 + sizeof...(Ts) > tensor_infos_array{ { tensor_info_2, std::forward<Ts>(tensor_infos)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info) - { - return tensor_info->data_type() != first_data_type; - }), - function, file, line, "Tensors have different asymmetric quantized data types"); - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info) - { - return tensor_info->quantization_info() != first_quantization_info; - }), - function, file, line, "Tensors have different quantization information"); + const std::array<const ITensorInfo *, 1 + sizeof...(Ts)> tensor_infos_array{ + {tensor_info_2, std::forward<Ts>(tensor_infos)...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), + [&](const ITensorInfo *tensor_info) + { return tensor_info->data_type() != first_data_type; }), + function, file, line, "Tensors have different asymmetric quantized data types"); + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG( + std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), + [&](const ITensorInfo *tensor_info) + { return tensor_info->quantization_info() != first_quantization_info; }), + function, file, line, "Tensors have different quantization information"); return arm_compute::Status{}; } @@ -594,17 +735,24 @@ inline arm_compute::Status error_on_mismatching_quantization_info(const char *fu * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, const char *file, const int line, - const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, + const char *file, + const int line, + const ITensor *tensor_1, + const ITensor *tensor_2, + Ts... tensors) { - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_quantization_info(function, file, line, tensor_1->info(), tensor_2->info(), - detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); + ARM_COMPUTE_RETURN_ON_ERROR( + ::arm_compute::error_on_mismatching_quantization_info(function, file, line, tensor_1->info(), tensor_2->info(), + detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__)) #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__)) /** Throw an error if the format of the passed tensor/multi-image does not match any of the formats provided. * @@ -616,8 +764,8 @@ inline arm_compute::Status error_on_mismatching_quantization_info(const char *fu * @param[in] formats (Optional) Further allowed formats. */ template <typename T, typename F, typename... Fs> -void error_on_format_not_in(const char *function, const char *file, const int line, - const T *object, F &&format, Fs &&... formats) +void error_on_format_not_in( + const char *function, const char *file, const int line, const T *object, F &&format, Fs &&...formats) { ARM_COMPUTE_ERROR_ON_LOC(object == nullptr, function, file, line); @@ -626,17 +774,17 @@ void error_on_format_not_in(const char *function, const char *file, const int li ARM_COMPUTE_ERROR_ON_LOC(object_format == Format::UNKNOWN, function, file, line); - const std::array<F, sizeof...(Fs)> formats_array{ { std::forward<Fs>(formats)... } }; + const std::array<F, sizeof...(Fs)> formats_array{{std::forward<Fs>(formats)...}}; ARM_COMPUTE_UNUSED(formats_array); - ARM_COMPUTE_ERROR_ON_LOC_MSG(object_format != format && std::none_of(formats_array.begin(), formats_array.end(), [&](const F & f) - { - return f == object_format; - }), - function, file, line, "Format %s not supported by this kernel", string_from_format(object_format).c_str()); + ARM_COMPUTE_ERROR_ON_LOC_MSG( + object_format != format && + std::none_of(formats_array.begin(), formats_array.end(), [&](const F &f) { return f == object_format; }), + function, file, line, "Format %s not supported by this kernel", string_from_format(object_format).c_str()); ARM_COMPUTE_UNUSED(function, format, file, line); } -#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t, ...) ::arm_compute::error_on_format_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__) +#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t, ...) \ + ::arm_compute::error_on_format_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__) /** Return an error if the data type of the passed tensor info does not match any of the data types provided. * @@ -650,20 +798,19 @@ void error_on_format_not_in(const char *function, const char *file, const int li * @return Status */ template <typename T, typename... Ts> -inline arm_compute::Status error_on_data_type_not_in(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info, T &&dt, Ts &&... dts) +inline arm_compute::Status error_on_data_type_not_in( + const char *function, const char *file, const int line, const ITensorInfo *tensor_info, T &&dt, Ts &&...dts) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); const DataType &tensor_dt = tensor_info->data_type(); //NOLINT ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_dt == DataType::UNKNOWN, function, file, line); - const std::array<T, sizeof...(Ts)> dts_array{ { std::forward<Ts>(dts)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_dt != dt && std::none_of(dts_array.begin(), dts_array.end(), [&](const T & d) - { - return d == tensor_dt; - }), - function, file, line, "ITensor data type %s not supported by this kernel", string_from_data_type(tensor_dt).c_str()); + const std::array<T, sizeof...(Ts)> dts_array{{std::forward<Ts>(dts)...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR( + tensor_dt != dt && std::none_of(dts_array.begin(), dts_array.end(), [&](const T &d) { return d == tensor_dt; }), + function, file, line, "ITensor data type %s not supported by this kernel", + string_from_data_type(tensor_dt).c_str()); return arm_compute::Status{}; } /** Return an error if the data type of the passed tensor does not match any of the data types provided. @@ -678,11 +825,12 @@ inline arm_compute::Status error_on_data_type_not_in(const char *function, const * @return Status */ template <typename T, typename... Ts> -inline arm_compute::Status error_on_data_type_not_in(const char *function, const char *file, const int line, - const ITensor *tensor, T &&dt, Ts &&... dts) +inline arm_compute::Status error_on_data_type_not_in( + const char *function, const char *file, const int line, const ITensor *tensor, T &&dt, Ts &&...dts) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(function, file, line, tensor->info(), std::forward<T>(dt), std::forward<Ts>(dts)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in( + function, file, line, tensor->info(), std::forward<T>(dt), std::forward<Ts>(dts)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(t, ...) \ @@ -702,20 +850,19 @@ inline arm_compute::Status error_on_data_type_not_in(const char *function, const * @return Status */ template <typename T, typename... Ts> -inline arm_compute::Status error_on_data_layout_not_in(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info, T &&dl, Ts &&... dls) +inline arm_compute::Status error_on_data_layout_not_in( + const char *function, const char *file, const int line, const ITensorInfo *tensor_info, T &&dl, Ts &&...dls) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); const DataLayout &tensor_dl = tensor_info->data_layout(); //NOLINT ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_dl == DataLayout::UNKNOWN, function, file, line); - const std::array<T, sizeof...(Ts)> dls_array{ { std::forward<Ts>(dls)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_dl != dl && std::none_of(dls_array.begin(), dls_array.end(), [&](const T & l) - { - return l == tensor_dl; - }), - function, file, line, "ITensor data layout %s not supported by this kernel", string_from_data_layout(tensor_dl).c_str()); + const std::array<T, sizeof...(Ts)> dls_array{{std::forward<Ts>(dls)...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR( + tensor_dl != dl && std::none_of(dls_array.begin(), dls_array.end(), [&](const T &l) { return l == tensor_dl; }), + function, file, line, "ITensor data layout %s not supported by this kernel", + string_from_data_layout(tensor_dl).c_str()); return arm_compute::Status{}; } /** Return an error if the data layout of the passed tensor does not match any of the data layout provided. @@ -730,17 +877,19 @@ inline arm_compute::Status error_on_data_layout_not_in(const char *function, con * @return Status */ template <typename T, typename... Ts> -inline arm_compute::Status error_on_data_layout_not_in(const char *function, const char *file, const int line, - const ITensor *tensor, T &&dl, Ts &&... dls) +inline arm_compute::Status error_on_data_layout_not_in( + const char *function, const char *file, const int line, const ITensor *tensor, T &&dl, Ts &&...dls) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in(function, file, line, tensor->info(), std::forward<T>(dl), std::forward<Ts>(dls)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in( + function, file, line, tensor->info(), std::forward<T>(dl), std::forward<Ts>(dls)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_DATA_LAYOUT_NOT_IN(t, ...) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__)) #define ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(t, ...) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__)) /** Return an error if the data type or the number of channels of the passed tensor info does not match any of the data types and number of channels provided. * @@ -755,12 +904,20 @@ inline arm_compute::Status error_on_data_layout_not_in(const char *function, con * @return Status */ template <typename T, typename... Ts> -inline arm_compute::Status error_on_data_type_channel_not_in(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info, size_t num_channels, T &&dt, Ts &&... dts) +inline arm_compute::Status error_on_data_type_channel_not_in(const char *function, + const char *file, + const int line, + const ITensorInfo *tensor_info, + size_t num_channels, + T &&dt, + Ts &&...dts) { - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(function, file, line, tensor_info, std::forward<T>(dt), std::forward<Ts>(dts)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in( + function, file, line, tensor_info, std::forward<T>(dt), std::forward<Ts>(dts)...)); const size_t tensor_nc = tensor_info->num_channels(); - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_nc != num_channels, function, file, line, "Number of channels %zu. Required number of channels %zu", tensor_nc, num_channels); + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_nc != num_channels, function, file, line, + "Number of channels %zu. Required number of channels %zu", tensor_nc, + num_channels); return arm_compute::Status{}; } /** Return an error if the data type or the number of channels of the passed tensor does not match any of the data types and number of channels provided. @@ -776,17 +933,25 @@ inline arm_compute::Status error_on_data_type_channel_not_in(const char *functio * @return Status */ template <typename T, typename... Ts> -inline arm_compute::Status error_on_data_type_channel_not_in(const char *function, const char *file, const int line, - const ITensor *tensor, size_t num_channels, T &&dt, Ts &&... dts) +inline arm_compute::Status error_on_data_type_channel_not_in(const char *function, + const char *file, + const int line, + const ITensor *tensor, + size_t num_channels, + T &&dt, + Ts &&...dts) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(error_on_data_type_channel_not_in(function, file, line, tensor->info(), num_channels, std::forward<T>(dt), std::forward<Ts>(dts)...)); + ARM_COMPUTE_RETURN_ON_ERROR(error_on_data_type_channel_not_in(function, file, line, tensor->info(), num_channels, + std::forward<T>(dt), std::forward<Ts>(dts)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c, ...) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__)) #define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c, ...) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__)) /** Return an error if the data type of the passed tensor info is FP16 and FP16 extension is not supported by the device. * @@ -798,12 +963,12 @@ inline arm_compute::Status error_on_data_type_channel_not_in(const char *functio * * @return Status */ -inline arm_compute::Status error_on_unsupported_fp16(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info, bool is_fp16_supported) +inline arm_compute::Status error_on_unsupported_fp16( + const char *function, const char *file, const int line, const ITensorInfo *tensor_info, bool is_fp16_supported) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG((tensor_info->data_type() == DataType::F16 && !is_fp16_supported), - function, file, line, "FP16 not supported by the device"); + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG((tensor_info->data_type() == DataType::F16 && !is_fp16_supported), function, + file, line, "FP16 not supported by the device"); return arm_compute::Status{}; } @@ -817,11 +982,12 @@ inline arm_compute::Status error_on_unsupported_fp16(const char *function, const * * @return Status */ -inline arm_compute::Status error_on_unsupported_fp16(const char *function, const char *file, const int line, - const ITensor *tensor, bool is_fp16_supported) +inline arm_compute::Status error_on_unsupported_fp16( + const char *function, const char *file, const int line, const ITensor *tensor, bool is_fp16_supported) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_fp16(function, file, line, tensor->info(), is_fp16_supported)); + ARM_COMPUTE_RETURN_ON_ERROR( + ::arm_compute::error_on_unsupported_fp16(function, file, line, tensor->info(), is_fp16_supported)); return arm_compute::Status{}; } @@ -834,8 +1000,8 @@ inline arm_compute::Status error_on_unsupported_fp16(const char *function, const * * @return Status */ -arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line, - const ITensor *tensor); +arm_compute::Status +error_on_tensor_not_2d(const char *function, const char *file, const int line, const ITensor *tensor); /** Return an error if the tensor info is not 2D. * @@ -846,8 +1012,8 @@ arm_compute::Status error_on_tensor_not_2d(const char *function, const char *fil * * @return Status */ -arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line, - const ITensorInfo *tensor); +arm_compute::Status +error_on_tensor_not_2d(const char *function, const char *file, const int line, const ITensorInfo *tensor); #define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensor_not_2d(__func__, __FILE__, __LINE__, t)) @@ -866,17 +1032,15 @@ arm_compute::Status error_on_tensor_not_2d(const char *function, const char *fil * @return Status */ template <typename T, typename... Ts> -inline arm_compute::Status error_on_channel_not_in(const char *function, const char *file, const int line, - T cn, T &&channel, Ts &&... channels) +inline arm_compute::Status +error_on_channel_not_in(const char *function, const char *file, const int line, T cn, T &&channel, Ts &&...channels) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(cn == Channel::UNKNOWN, function, file, line); - const std::array<T, sizeof...(Ts)> channels_array{ { std::forward<Ts>(channels)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC(channel != cn && std::none_of(channels_array.begin(), channels_array.end(), [&](const T & f) - { - return f == cn; - }), - function, file, line); + const std::array<T, sizeof...(Ts)> channels_array{{std::forward<Ts>(channels)...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC(channel != cn && std::none_of(channels_array.begin(), channels_array.end(), + [&](const T &f) { return f == cn; }), + function, file, line); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN(c, ...) \ @@ -894,8 +1058,8 @@ inline arm_compute::Status error_on_channel_not_in(const char *function, const c * * @return Status */ -arm_compute::Status error_on_channel_not_in_known_format(const char *function, const char *file, const int line, - Format fmt, Channel cn); +arm_compute::Status +error_on_channel_not_in_known_format(const char *function, const char *file, const int line, Format fmt, Channel cn); #define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(f, c) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_channel_not_in_known_format(__func__, __FILE__, __LINE__, f, c)) #define ARM_COMPUTE_RETURN_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(f, c) \ @@ -910,8 +1074,8 @@ arm_compute::Status error_on_channel_not_in_known_format(const char *function, c * * @return Status */ -arm_compute::Status error_on_unconfigured_kernel(const char *function, const char *file, const int line, - const IKernel *kernel); +arm_compute::Status +error_on_unconfigured_kernel(const char *function, const char *file, const int line, const IKernel *kernel); #define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unconfigured_kernel(__func__, __FILE__, __LINE__, k)) #define ARM_COMPUTE_RETURN_ERROR_ON_UNCONFIGURED_KERNEL(k) \ @@ -928,8 +1092,12 @@ arm_compute::Status error_on_unconfigured_kernel(const char *function, const cha * * @return Status */ -arm_compute::Status error_on_invalid_subtensor(const char *function, const char *file, const int line, - const TensorShape &parent_shape, const Coordinates &coords, const TensorShape &shape); +arm_compute::Status error_on_invalid_subtensor(const char *function, + const char *file, + const int line, + const TensorShape &parent_shape, + const Coordinates &coords, + const TensorShape &shape); #define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR(p, c, s) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, p, c, s)) #define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBTENSOR(p, c, s) \ @@ -945,11 +1113,16 @@ arm_compute::Status error_on_invalid_subtensor(const char *function, const char * * @return Status */ -arm_compute::Status error_on_invalid_subtensor_valid_region(const char *function, const char *file, const int line, - const ValidRegion &parent_valid_region, const ValidRegion &valid_region); +arm_compute::Status error_on_invalid_subtensor_valid_region(const char *function, + const char *file, + const int line, + const ValidRegion &parent_valid_region, + const ValidRegion &valid_region); #define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(pv, sv) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv)) #define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(pv, sv) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv)) -} + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv)) +} // namespace arm_compute #endif /* ARM_COMPUTE_VALIDATE_H*/ diff --git a/arm_compute/core/Version.h b/arm_compute/core/Version.h index a4d307950a..44d400bad8 100644 --- a/arm_compute/core/Version.h +++ b/arm_compute/core/Version.h @@ -28,7 +28,7 @@ /* Macro utilities */ #define ARM_COMPUTE_STRINGIFY2(s) #s -#define ARM_COMPUTE_STRINGIFY(s) ARM_COMPUTE_STRINGIFY2(s) +#define ARM_COMPUTE_STRINGIFY(s) ARM_COMPUTE_STRINGIFY2(s) #define ARM_COMPUTE_VERSION_STR \ ARM_COMPUTE_STRINGIFY(ARM_COMPUTE_VERSION_MAJOR) \ diff --git a/arm_compute/core/Window.h b/arm_compute/core/Window.h index 150320a90e..e93d2863c9 100644 --- a/arm_compute/core/Window.h +++ b/arm_compute/core/Window.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2020, 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,17 +21,17 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_WINDOW_H -#define ARM_COMPUTE_WINDOW_H - -#include <algorithm> -#include <array> -#include <cstddef> +#ifndef ACL_ARM_COMPUTE_CORE_WINDOW_H +#define ACL_ARM_COMPUTE_CORE_WINDOW_H #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensorInfo.h" -#include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/math/Math.h" + +#include <algorithm> +#include <array> +#include <cstddef> namespace arm_compute { @@ -47,6 +47,8 @@ public: static constexpr size_t DimZ = 2; /** Alias for dimension 3 also known as W dimension */ static constexpr size_t DimW = 3; + /** Alias for dimension 4 also known as V dimension */ + static constexpr size_t DimV = 4; /** Default constructor: create a window containing a single element. */ constexpr Window() @@ -84,10 +86,10 @@ public: * @param[in] step Step between two elements of the dimension when iterating. * */ - constexpr Dimension(int start = 0, int end = 1, int step = 1) - : _start(start), _end(end), _step(step) + constexpr Dimension(int start = 0, int end = 1, int step = 1) : _start(start), _end(end), _step(step) { } + Dimension(const Dimension &d) = default; /** Default assignment operator to allow dimensions to be copied */ Dimension &operator=(const Dimension &d) = default; /** Return the start of the dimension */ @@ -121,6 +123,17 @@ public: { _end = end; } + /** Check whether two Dimensions are equal. + * + * @param[in] lhs LHS Dimensions + * @param[in] rhs RHS Dimensions + * + * @return True if the Dimensions are the same. + */ + friend bool operator==(const Dimension &lhs, const Dimension &rhs) + { + return (lhs._start == rhs._start) && (lhs._end == rhs._end) && (lhs._step == rhs._step); + } private: int _start; /**< Start of the dimension */ @@ -200,15 +213,17 @@ public: */ void shift(size_t dimension, int shift_value); - /** Shift down all the dimensions of a window + /** Shift down all the dimensions of a window starting from the specified dimension. * - * i.e new_dims[n] = old_dims[n+shift_value]. + * new_dims[i] = old_dims[i] for all i < start_dim. + * new_dims[i] = old_dims[i+shift_value] for all i >= start_dim. * * @param[in] shift_value Number of dimensions to shift the window by. + * @param[in] start_dim The dimension from which the dimensions start to shift. * * @return The window with the shifted dimensions. */ - Window shift_dimensions(unsigned int shift_value) const; + Window shift_dimensions(unsigned int shift_value, unsigned int start_dim = 0) const; /** Adjust the start or end of a given dimension by the given value * @@ -348,7 +363,6 @@ public: { return slide_window_slice<4>(slice); } - /** Collapse the dimensions between @p first and @p last if possible. * * A dimension is collapsable if it starts from 0 and matches the corresponding dimension in the full_window @@ -360,7 +374,8 @@ public: * * @return Collapsed window. */ - Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed = nullptr) const; + Window + collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed = nullptr) const; /** Collapse the dimensions higher than @p first if possible. * @@ -413,6 +428,14 @@ public: * @param[in] rhs Second window to swap. */ friend void swap(Window &lhs, Window &rhs); + /** Check whether two Windows are equal. + * + * @param[in] lhs LHS window + * @param[in] rhs RHS window + * + * @return True if the given windows are the same. + */ + friend bool operator==(const Window &lhs, const Window &rhs); private: /** First slice of the window @@ -420,7 +443,7 @@ private: * @return The first slice of the window. */ template <unsigned int window_dimension> - Window first_slice_window() const; + Window first_slice_window() const; /** Slide the passed window slice. * @@ -439,4 +462,4 @@ private: }; } // namespace arm_compute #include "Window.inl" -#endif /*ARM_COMPUTE_WINDOW_H */ +#endif // ACL_ARM_COMPUTE_CORE_WINDOW_H diff --git a/arm_compute/core/Window.inl b/arm_compute/core/Window.inl index 6100d09a1c..0f7c4fbdd7 100644 --- a/arm_compute/core/Window.inl +++ b/arm_compute/core/Window.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2020, 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,12 +21,16 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + +#ifndef ACL_ARM_COMPUTE_CORE_WINDOW_INL +#define ACL_ARM_COMPUTE_CORE_WINDOW_INL + namespace arm_compute { inline Window::Window(const Window &src) : _dims(), _is_broadcasted(utility::generate_array<bool, Coordinates::num_max_dimensions, false>::value) { - for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i) + for (size_t i = 0; i < Coordinates::num_max_dimensions; ++i) { set(i, src[i]); _is_broadcasted[i] = src.is_broadcasted(i); @@ -65,32 +69,34 @@ inline bool Window::is_broadcasted(size_t dimension) const return _is_broadcasted[dimension]; } -inline Window Window::collapse_if_possible(const Window &full_window, const size_t first, - const size_t last, bool *has_collapsed) const +inline Window Window::collapse_if_possible(const Window &full_window, + const size_t first, + const size_t last, + bool *has_collapsed) const { Window collapsed(*this); bool is_collapsable = true; int collapsed_end = _dims[first].end(); - for(size_t d = first + 1; is_collapsable && (d < last); ++d) + for (size_t d = first + 1; is_collapsable && (d < last); ++d) { // The _dims's dimension must match the full _dims dimension to be collapsable: - is_collapsable = (_dims[d].start() == 0) && (full_window[d].start() == 0) && (_dims[d].step() <= 1) - && (full_window[d].end() == _dims[d].end()); + is_collapsable = (_dims[d].start() == 0) && (full_window[d].start() == 0) && (_dims[d].step() <= 1) && + (full_window[d].end() == _dims[d].end()); collapsed_end *= _dims[d].end(); } - if(is_collapsable) + if (is_collapsable) { collapsed._dims.at(first).set_end(collapsed_end); - for(size_t d = first + 1; is_collapsable && (d < last); ++d) + for (size_t d = first + 1; is_collapsable && (d < last); ++d) { collapsed.set(d, Dimension()); } } - if(has_collapsed != nullptr) + if (has_collapsed != nullptr) { *has_collapsed = is_collapsable; } @@ -98,13 +104,21 @@ inline Window Window::collapse_if_possible(const Window &full_window, const size return collapsed; } -inline Window Window::shift_dimensions(unsigned int shift_value) const +inline Window Window::shift_dimensions(unsigned int shift_value, unsigned int start_dim) const { Window shifted_window; - for(size_t n = 0; n < (Coordinates::num_max_dimensions - shift_value); n++) + size_t n = 0; + + for (; n < start_dim; ++n) + { + shifted_window.set(n, _dims[n]); + } + + for (; n < (Coordinates::num_max_dimensions - shift_value); n++) { shifted_window.set(n, _dims[n + shift_value]); } + return shifted_window; } @@ -120,9 +134,9 @@ inline Window Window::collapse(const Window &full_window, const size_t first, co inline Window Window::broadcast_if_dimension_le_one(const TensorShape &shape) const { Window broadcastWin(*this); - for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d) + for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d) { - if(shape[d] <= 1) + if (shape[d] <= 1) { broadcastWin.set_broadcasted(d); } @@ -142,7 +156,7 @@ inline void Window::adjust(size_t dimension, int adjust_value, bool is_at_start) ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); Window::Dimension &d = _dims[dimension]; - if(is_at_start) + if (is_at_start) { d = Window::Dimension(d.start() + adjust_value, d.end(), d.step()); } @@ -172,7 +186,7 @@ inline void Window::set_dimension_step(size_t dimension, int step) inline void Window::validate() const { - for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i) + for (size_t i = 0; i < Coordinates::num_max_dimensions; ++i) { ARM_COMPUTE_ERROR_ON(_dims[i].end() < _dims[i].start()); ARM_COMPUTE_ERROR_ON((_dims[i].step() != 0) && (((_dims[i].end() - _dims[i].start()) % _dims[i].step()) != 0)); @@ -193,9 +207,9 @@ inline Window Window::split_window(size_t dimension, size_t id, size_t total) co Window out; - for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d) + for (size_t d = 0; d < Coordinates::num_max_dimensions; ++d) { - if(d == dimension) + if (d == dimension) { int start = _dims[d].start(); int end = _dims[d].end(); @@ -207,7 +221,7 @@ inline Window Window::split_window(size_t dimension, size_t id, size_t total) co int it_start = work * id; - if(int(id) < rem) + if (int(id) < rem) { ++work; it_start += id; @@ -234,18 +248,18 @@ inline Window Window::split_window(size_t dimension, size_t id, size_t total) co template <unsigned int window_dimension> inline bool Window::slide_window_slice(Window &slice) const { - for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n) + for (unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n) { // Did we reach the end of this dimension? const int v = slice._dims[n].start() + 1; - if(v < _dims[n].end()) + if (v < _dims[n].end()) { // No: increment slice._dims[n] = Dimension(v, v + 1, 1); // Reset lower dimensions: - for(unsigned int lower = window_dimension; lower < n; ++lower) + for (unsigned int lower = window_dimension; lower < n; ++lower) { slice._dims[lower] = Dimension(_dims[lower].start(), _dims[lower].start() + 1, 1); } @@ -258,14 +272,14 @@ inline bool Window::slide_window_slice(Window &slice) const } template <unsigned int window_dimension> -inline Window Window::first_slice_window() const +inline Window Window::first_slice_window() const { Window slice; std::copy_n(_dims.begin(), window_dimension, slice._dims.begin()); //Initialise higher dimensions to be the first slice. - for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n) + for (unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n) { slice._dims[n] = Dimension(_dims[n].start(), _dims[n].start() + 1, 1); } @@ -275,7 +289,7 @@ inline Window Window::first_slice_window() const inline void Window::use_tensor_dimensions(const TensorShape &shape, size_t first_dimension) { - for(unsigned int n = first_dimension; n < shape.num_dimensions(); ++n) + for (unsigned int n = first_dimension; n < shape.num_dimensions(); ++n) { set(n, Window::Dimension(0, std::max(shape[n], static_cast<size_t>(1)))); } @@ -284,7 +298,7 @@ inline void Window::use_tensor_dimensions(const TensorShape &shape, size_t first inline TensorShape Window::shape() const { TensorShape shape; - for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d) + for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d) { shape.set(d, (_dims[d].end() - _dims[d].start()) / _dims[d].step()); } @@ -294,7 +308,7 @@ inline TensorShape Window::shape() const inline size_t Window::num_iterations_total() const { size_t total = 1; - for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d) + for (size_t d = 0; d < Coordinates::num_max_dimensions; ++d) { total *= num_iterations(d); } @@ -305,4 +319,11 @@ inline void swap(Window &lhs, Window &rhs) { lhs._dims.swap(rhs._dims); } + +inline bool operator==(const Window &lhs, const Window &rhs) +{ + return (lhs._dims == rhs._dims) && (lhs._is_broadcasted == rhs._is_broadcasted); +} } // namespace arm_compute + +#endif // ACL_ARM_COMPUTE_CORE_WINDOW_INL diff --git a/arm_compute/core/WindowIterator.h b/arm_compute/core/WindowIterator.h index c15a50cf47..29302c410a 100644 --- a/arm_compute/core/WindowIterator.h +++ b/arm_compute/core/WindowIterator.h @@ -28,10 +28,6 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Window.h" -//FIXME: Delete the "ARM_COMPUTE_PRINTF" before the release. In the meantime it's probably going to be useful to debug -//#define ARM_COMPUTE_PRINTF printf -#define ARM_COMPUTE_PRINTF(...) - namespace arm_compute { /** Convert an offset in window steps into absolute coordinates. @@ -44,7 +40,7 @@ namespace arm_compute inline Coordinates convert_window_coord_to_position(const Window &w, const Coordinates &offset) { Coordinates position; - for(unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i) + for (unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i) { position.set(i, w[i].start() + offset[i] * w[i].step()); } @@ -168,16 +164,14 @@ public: template <typename M> void iterate_3D(M &&on_new_row_size) { - while(_end.z() != _position.z()) + while (_end.z() != _position.z()) { - ARM_COMPUTE_PRINTF("New slice %d\n", _position.z()); iterate_2D_internal(on_new_row_size, _w.x().end() - _w.x().step(), _w.y().end() - _w.y().step()); _position[2] += _w.z().step(); _position[1] = _w.y().start(); _position[0] = _w.x().start(); } // Left over: - ARM_COMPUTE_PRINTF("Left over slice\n"); iterate_2D(on_new_row_size); } @@ -217,29 +211,25 @@ private: void iterate_2D_internal(M &&on_new_row_size, int end_x, int end_y) { //Is there more than one row to process ? - if(end_y == _position.y()) + if (end_y == _position.y()) { - // Single row: - ARM_COMPUTE_PRINTF("Partial row only\n"); // Both start and end belong to the same row: iterate_over_dim0(end_x + _w.x().step(), on_new_row_size); } else { // Do we start from the beginning of the row ? - if(_w.x().start() != _position.x()) + if (_w.x().start() != _position.x()) { //Start in the middle of a row: process left-over X - ARM_COMPUTE_PRINTF("Partial row first\n"); iterate_over_dim0(_w.x().end(), on_new_row_size); _position[1] += _w.y().step(); } //Middle rows bool no_leftover = end_x + _w.x().step() == _w.x().end(); - if(no_leftover) + if (no_leftover) { - ARM_COMPUTE_PRINTF("no left over\n"); //Switch to full row size: on_new_row_size(_w[0].start(), _w.x().end()); // Shouldn't be possible to reach that point and not have at least one entire row to process @@ -249,17 +239,14 @@ private: } else { - ARM_COMPUTE_PRINTF("with left over\n"); // Are there full rows to process ? - if(_position[1] != end_y) + if (_position[1] != end_y) { - ARM_COMPUTE_PRINTF("full rows\n"); //Switch to full row size: on_new_row_size(_w[0].start(), _w.x().end()); iterate_over_dim1(end_y); } - ARM_COMPUTE_PRINTF("Final leftover\n"); //Leftover end x _position[0] = _w.x().start(); iterate_over_dim0(end_x + _w.x().step(), on_new_row_size); @@ -273,7 +260,7 @@ private: */ void iterate_over_dim1(int end) { - for(; _position[1] != end; _position[1] += _w[1].step()) + for (; _position[1] != end; _position[1] += _w[1].step()) { _position[0] = _w[0].start(); iterate_over_dim0(_w[0].end()); @@ -298,10 +285,9 @@ private: */ void iterate_over_dim0(int end) { - ARM_COMPUTE_PRINTF("X [%d, %d, %d]\n", _position.x(), end, _w[0].step()); // Both start and end belong to the same row: ARM_COMPUTE_ERROR_ON(_position[0] > end); - for(; _position.x() < end; _position[0] += _w[0].step()) + for (; _position.x() < end; _position[0] += _w[0].step()) { _lambda_function(_position); } @@ -323,9 +309,10 @@ private: * @return A WindowIterator object. */ template <typename L> -WindowIterator<L> create_window_iterator(const Window &w, const Coordinates &start, const Coordinates &end, L &&lambda_function) +WindowIterator<L> +create_window_iterator(const Window &w, const Coordinates &start, const Coordinates &end, L &&lambda_function) { return WindowIterator<L>(w, start, end, std::move(lambda_function)); } -} +} // namespace arm_compute #endif /*ARM_COMPUTE_WINDOW_ITERATOR_H*/ diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h index 92ece460dc..63a3a1a1ec 100644 --- a/arm_compute/core/experimental/Types.h +++ b/arm_compute/core/experimental/Types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_EXPERIMENTAL_TYPES_H -#define ARM_COMPUTE_EXPERIMENTAL_TYPES_H +#ifndef ACL_ARM_COMPUTE_CORE_EXPERIMENTAL_TYPES_H +#define ACL_ARM_COMPUTE_CORE_EXPERIMENTAL_TYPES_H #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/TensorShape.h" @@ -39,15 +39,26 @@ enum TensorType : int32_t { ACL_UNKNOWN = -1, ACL_SRC_DST = 0, + + // Src ACL_SRC = 0, ACL_SRC_0 = 0, ACL_SRC_1 = 1, ACL_SRC_2 = 2, + ACL_SRC_3 = 3, + ACL_SRC_4 = 4, + ACL_SRC_5 = 5, + ACL_SRC_6 = 6, + ACL_SRC_END = 6, + + // Dst ACL_DST = 30, ACL_DST_0 = 30, ACL_DST_1 = 31, ACL_DST_2 = 32, - ACL_BIAS = ACL_SRC_2, + ACL_DST_END = 32, + + // Aux ACL_INT = 50, ACL_INT_0 = 50, ACL_INT_1 = 51, @@ -56,7 +67,17 @@ enum TensorType : int32_t ACL_INT_4 = 54, ACL_SRC_VEC = 256, ACL_DST_VEC = 512, - ACL_INT_VEC = 1024 + ACL_INT_VEC = 1024, + + // Aliasing Types + // Conv etc + ACL_BIAS = ACL_SRC_2, + + // Gemm + ACL_VEC_ROW_SUM = ACL_SRC_3, + ACL_VEC_COL_SUM = ACL_SRC_4, + ACL_SHIFTS = ACL_SRC_5, + ACL_MULTIPLIERS = ACL_SRC_6, }; namespace experimental @@ -71,27 +92,35 @@ struct MemoryInfo { MemoryInfo() = default; - MemoryInfo(int slot, size_t size, size_t alignment = 0) noexcept - : slot(slot), - size(size), - alignment(alignment) + MemoryInfo(int slot, size_t size, size_t alignment = 0) noexcept : slot(slot), size(size), alignment(alignment) { } MemoryInfo(int slot, MemoryLifetime lifetime, size_t size, size_t alignment = 0) noexcept - : slot(slot), - lifetime(lifetime), - size(size), - alignment(alignment) + : slot(slot), lifetime(lifetime), size(size), alignment(alignment) { } - int slot{ ACL_UNKNOWN }; - MemoryLifetime lifetime{ MemoryLifetime::Temporary }; - size_t size{ 0 }; - size_t alignment{ 64 }; + + bool merge(int slot, size_t new_size, size_t new_alignment = 0) noexcept + { + if (slot != this->slot) + { + return false; + } + + size = std::max(size, new_size); + alignment = std::max(alignment, new_alignment); + + return true; + } + + int slot{ACL_UNKNOWN}; + MemoryLifetime lifetime{MemoryLifetime::Temporary}; + size_t size{0}; + size_t alignment{64}; }; using MemoryRequirements = std::vector<MemoryInfo>; } // namespace experimental } // namespace arm_compute -#endif /* ARM_COMPUTE_EXPERIMENTAL_TYPES_H */ +#endif // ACL_ARM_COMPUTE_CORE_EXPERIMENTAL_TYPES_H diff --git a/arm_compute/core/utils/ActivationFunctionUtils.h b/arm_compute/core/utils/ActivationFunctionUtils.h new file mode 100644 index 0000000000..c988efa256 --- /dev/null +++ b/arm_compute/core/utils/ActivationFunctionUtils.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016-2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H +#define ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H + +#include "arm_compute/core/Types.h" + +#include <string> + +namespace arm_compute +{ +/** Translates a given activation function to a string. + * + * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string. + * + * @return The string describing the activation function. + */ +const std::string &string_from_activation_func(const ActivationFunction &act); +} // namespace arm_compute +#endif /*ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H */ diff --git a/arm_compute/core/utils/DataLayoutUtils.h b/arm_compute/core/utils/DataLayoutUtils.h new file mode 100644 index 0000000000..61839c9f91 --- /dev/null +++ b/arm_compute/core/utils/DataLayoutUtils.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016-2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H +#define ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H +#include "arm_compute/core/Types.h" + +#include <string> + +namespace arm_compute +{ +/** Convert a data layout identity into a string. + * + * @param[in] dl @ref DataLayout to be translated to string. + * + * @return The string describing the data layout. + */ +const std::string &string_from_data_layout(DataLayout dl); +} // namespace arm_compute +#endif /*ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H */ diff --git a/arm_compute/core/utils/DataTypeUtils.h b/arm_compute/core/utils/DataTypeUtils.h new file mode 100644 index 0000000000..6fabb19b64 --- /dev/null +++ b/arm_compute/core/utils/DataTypeUtils.h @@ -0,0 +1,549 @@ +/* + * Copyright (c) 2016-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H +#define ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +/** The size in bytes of the data type + * + * @param[in] data_type Input data type + * + * @return The size in bytes of the data type + */ +inline size_t data_size_from_type(DataType data_type) +{ + switch (data_type) + { + case DataType::U8: + case DataType::S8: + case DataType::QSYMM8: + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::QSYMM8_PER_CHANNEL: + return 1; + case DataType::U16: + case DataType::S16: + case DataType::QSYMM16: + case DataType::QASYMM16: + case DataType::BFLOAT16: + case DataType::F16: + return 2; + case DataType::F32: + case DataType::U32: + case DataType::S32: + return 4; + case DataType::F64: + case DataType::U64: + case DataType::S64: + return 8; + case DataType::SIZET: + return sizeof(size_t); + default: + ARM_COMPUTE_ERROR("Invalid data type"); + return 0; + } +} + +/** The size in bytes of the data type + * + * @param[in] dt Input data type + * + * @return The size in bytes of the data type + */ +inline size_t element_size_from_data_type(DataType dt) +{ + switch (dt) + { + case DataType::S8: + case DataType::U8: + case DataType::QSYMM8: + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::QSYMM8_PER_CHANNEL: + return 1; + case DataType::U16: + case DataType::S16: + case DataType::QSYMM16: + case DataType::QASYMM16: + case DataType::BFLOAT16: + case DataType::F16: + return 2; + case DataType::U32: + case DataType::S32: + case DataType::F32: + return 4; + case DataType::U64: + case DataType::S64: + return 8; + default: + ARM_COMPUTE_ERROR("Undefined element size for given data type"); + return 0; + } +} + +/** Return the data type used by a given single-planar pixel format + * + * @param[in] format Input format + * + * @return The size in bytes of the pixel format + */ +inline DataType data_type_from_format(Format format) +{ + switch (format) + { + case Format::U8: + case Format::UV88: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + return DataType::U8; + case Format::U16: + return DataType::U16; + case Format::S16: + return DataType::S16; + case Format::U32: + return DataType::U32; + case Format::S32: + return DataType::S32; + case Format::BFLOAT16: + return DataType::BFLOAT16; + case Format::F16: + return DataType::F16; + case Format::F32: + return DataType::F32; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + ARM_COMPUTE_ERROR("Not supported data_type for given format"); + return DataType::UNKNOWN; + } +} + +/** Return the promoted data type of a given data type. + * + * @note If promoted data type is not supported an error will be thrown + * + * @param[in] dt Data type to get the promoted type of. + * + * @return Promoted data type + */ +inline DataType get_promoted_data_type(DataType dt) +{ + switch (dt) + { + case DataType::U8: + return DataType::U16; + case DataType::S8: + return DataType::S16; + case DataType::U16: + return DataType::U32; + case DataType::S16: + return DataType::S32; + case DataType::QSYMM8: + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::QSYMM8_PER_CHANNEL: + case DataType::QSYMM16: + case DataType::QASYMM16: + case DataType::BFLOAT16: + case DataType::F16: + case DataType::U32: + case DataType::S32: + case DataType::F32: + ARM_COMPUTE_ERROR("Unsupported data type promotions!"); + default: + ARM_COMPUTE_ERROR("Undefined data type!"); + } + return DataType::UNKNOWN; +} + +/** Compute the mininum and maximum values a data type can take + * + * @param[in] dt Data type to get the min/max bounds of + * + * @return A tuple (min,max) with the minimum and maximum values respectively wrapped in PixelValue. + */ +inline std::tuple<PixelValue, PixelValue> get_min_max(DataType dt) +{ + PixelValue min{}; + PixelValue max{}; + switch (dt) + { + case DataType::U8: + case DataType::QASYMM8: + { + min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::lowest())); + max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::max())); + break; + } + case DataType::S8: + case DataType::QSYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::QSYMM8_PER_CHANNEL: + { + min = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::lowest())); + max = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::max())); + break; + } + case DataType::U16: + case DataType::QASYMM16: + { + min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::lowest())); + max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::max())); + break; + } + case DataType::S16: + case DataType::QSYMM16: + { + min = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::lowest())); + max = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::max())); + break; + } + case DataType::U32: + { + min = PixelValue(std::numeric_limits<uint32_t>::lowest()); + max = PixelValue(std::numeric_limits<uint32_t>::max()); + break; + } + case DataType::S32: + { + min = PixelValue(std::numeric_limits<int32_t>::lowest()); + max = PixelValue(std::numeric_limits<int32_t>::max()); + break; + } + case DataType::BFLOAT16: + { + min = PixelValue(bfloat16::lowest()); + max = PixelValue(bfloat16::max()); + break; + } + case DataType::F16: + { + min = PixelValue(std::numeric_limits<half>::lowest()); + max = PixelValue(std::numeric_limits<half>::max()); + break; + } + case DataType::F32: + { + min = PixelValue(std::numeric_limits<float>::lowest()); + max = PixelValue(std::numeric_limits<float>::max()); + break; + } + default: + ARM_COMPUTE_ERROR("Undefined data type!"); + } + return std::make_tuple(min, max); +} + +/** Convert a data type identity into a string. + * + * @param[in] dt @ref DataType to be translated to string. + * + * @return The string describing the data type. + */ +const std::string &string_from_data_type(DataType dt); + +/** Convert a string to DataType + * + * @param[in] name The name of the data type + * + * @return DataType + */ +DataType data_type_from_name(const std::string &name); + +/** Input Stream operator for @ref DataType + * + * @param[in] stream Stream to parse + * @param[out] data_type Output data type + * + * @return Updated stream + */ +inline ::std::istream &operator>>(::std::istream &stream, DataType &data_type) +{ + std::string value; + stream >> value; + data_type = data_type_from_name(value); + return stream; +} + +/** Check if a given data type is of floating point type + * + * @param[in] dt Input data type. + * + * @return True if data type is of floating point type, else false. + */ +inline bool is_data_type_float(DataType dt) +{ + switch (dt) + { + case DataType::F16: + case DataType::F32: + return true; + default: + return false; + } +} + +/** Check if a given data type is of quantized type + * + * @note Quantized is considered a super-set of fixed-point and asymmetric data types. + * + * @param[in] dt Input data type. + * + * @return True if data type is of quantized type, else false. + */ +inline bool is_data_type_quantized(DataType dt) +{ + switch (dt) + { + case DataType::QSYMM8: + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::QSYMM8_PER_CHANNEL: + case DataType::QSYMM16: + case DataType::QASYMM16: + return true; + default: + return false; + } +} + +/** Check if a given data type is of asymmetric quantized type + * + * @param[in] dt Input data type. + * + * @return True if data type is of asymmetric quantized type, else false. + */ +inline bool is_data_type_quantized_asymmetric(DataType dt) +{ + switch (dt) + { + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::QASYMM16: + return true; + default: + return false; + } +} + +/** Check if a given data type is of asymmetric quantized signed type + * + * @param[in] dt Input data type. + * + * @return True if data type is of asymmetric quantized signed type, else false. + */ +inline bool is_data_type_quantized_asymmetric_signed(DataType dt) +{ + switch (dt) + { + case DataType::QASYMM8_SIGNED: + return true; + default: + return false; + } +} + +/** Check if a given data type is of 8-bit asymmetric quantized signed type + * + * @param[in] dt Input data type. + * + * @return True if data type is of 8-bit asymmetric quantized signed type, else false. + */ +inline bool is_data_type_quantized_asymmetric_char(DataType dt) +{ + switch (dt) + { + case DataType::QASYMM8_SIGNED: + case DataType::QASYMM8: + return true; + default: + return false; + } +} + +/** Check if a given data type is of symmetric quantized type + * + * @param[in] dt Input data type. + * + * @return True if data type is of symmetric quantized type, else false. + */ +inline bool is_data_type_quantized_symmetric(DataType dt) +{ + switch (dt) + { + case DataType::QSYMM8: + case DataType::QSYMM8_PER_CHANNEL: + case DataType::QSYMM16: + return true; + default: + return false; + } +} + +/** Check if a given data type is of per channel type + * + * @param[in] dt Input data type. + * + * @return True if data type is of per channel type, else false. + */ +inline bool is_data_type_quantized_per_channel(DataType dt) +{ + switch (dt) + { + case DataType::QSYMM8_PER_CHANNEL: + return true; + default: + return false; + } +} + +/** Returns true if the value can be represented by the given data type + * + * @param[in] val value to be checked + * @param[in] dt data type that is checked + * @param[in] qinfo (Optional) quantization info if the data type is QASYMM8 + * + * @return true if the data type can hold the value. + */ +template <typename T> +bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = QuantizationInfo()) +{ + switch (dt) + { + case DataType::U8: + { + const auto val_u8 = static_cast<uint8_t>(val); + return ((val_u8 == val) && val >= std::numeric_limits<uint8_t>::lowest() && + val <= std::numeric_limits<uint8_t>::max()); + } + case DataType::QASYMM8: + { + double min = static_cast<double>(dequantize_qasymm8(0, qinfo)); + double max = static_cast<double>(dequantize_qasymm8(std::numeric_limits<uint8_t>::max(), qinfo)); + return ((double)val >= min && (double)val <= max); + } + case DataType::S8: + { + const auto val_s8 = static_cast<int8_t>(val); + return ((val_s8 == val) && val >= std::numeric_limits<int8_t>::lowest() && + val <= std::numeric_limits<int8_t>::max()); + } + case DataType::U16: + { + const auto val_u16 = static_cast<uint16_t>(val); + return ((val_u16 == val) && val >= std::numeric_limits<uint16_t>::lowest() && + val <= std::numeric_limits<uint16_t>::max()); + } + case DataType::S16: + { + const auto val_s16 = static_cast<int16_t>(val); + return ((val_s16 == val) && val >= std::numeric_limits<int16_t>::lowest() && + val <= std::numeric_limits<int16_t>::max()); + } + case DataType::U32: + { + const auto val_d64 = static_cast<double>(val); + const auto val_u32 = static_cast<uint32_t>(val); + return ((val_u32 == val_d64) && val_d64 >= std::numeric_limits<uint32_t>::lowest() && + val_d64 <= std::numeric_limits<uint32_t>::max()); + } + case DataType::S32: + { + const auto val_d64 = static_cast<double>(val); + const auto val_s32 = static_cast<int32_t>(val); + return ((val_s32 == val_d64) && val_d64 >= std::numeric_limits<int32_t>::lowest() && + val_d64 <= std::numeric_limits<int32_t>::max()); + } + case DataType::BFLOAT16: + return (val >= bfloat16::lowest() && val <= bfloat16::max()); + case DataType::F16: + return (val >= std::numeric_limits<half>::lowest() && val <= std::numeric_limits<half>::max()); + case DataType::F32: + return (val >= std::numeric_limits<float>::lowest() && val <= std::numeric_limits<float>::max()); + default: + ARM_COMPUTE_ERROR("Data type not supported"); + return false; + } +} + +/** Returns the suffix string of CPU kernel implementation names based on the given data type + * + * @param[in] data_type The data type the CPU kernel implemetation uses + * + * @return the suffix string of CPU kernel implementations + */ +inline std::string cpu_impl_dt(const DataType &data_type) +{ + std::string ret = ""; + + switch (data_type) + { + case DataType::F32: + ret = "fp32"; + break; + case DataType::F16: + ret = "fp16"; + break; + case DataType::U8: + ret = "u8"; + break; + case DataType::S16: + ret = "s16"; + break; + case DataType::S32: + ret = "s32"; + break; + case DataType::QASYMM8: + ret = "qu8"; + break; + case DataType::QASYMM8_SIGNED: + ret = "qs8"; + break; + case DataType::QSYMM16: + ret = "qs16"; + break; + case DataType::QSYMM8_PER_CHANNEL: + ret = "qp8"; + break; + case DataType::BFLOAT16: + ret = "bf16"; + break; + default: + ARM_COMPUTE_ERROR("Unsupported."); + } + + return ret; +} + +} // namespace arm_compute +#endif // ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H diff --git a/arm_compute/core/utils/FormatUtils.h b/arm_compute/core/utils/FormatUtils.h new file mode 100644 index 0000000000..a8e96bd361 --- /dev/null +++ b/arm_compute/core/utils/FormatUtils.h @@ -0,0 +1,344 @@ +/* + * Copyright (c) 2016-2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H +#define ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H + +#include "arm_compute/core/CoreTypes.h" +#include "arm_compute/core/Error.h" + +namespace arm_compute +{ +/** The size in bytes of the pixel format + * + * @param[in] format Input format + * + * @return The size in bytes of the pixel format + */ +inline size_t pixel_size_from_format(Format format) +{ + switch (format) + { + case Format::U8: + return 1; + case Format::U16: + case Format::S16: + case Format::BFLOAT16: + case Format::F16: + case Format::UV88: + case Format::YUYV422: + case Format::UYVY422: + return 2; + case Format::RGB888: + return 3; + case Format::RGBA8888: + return 4; + case Format::U32: + case Format::S32: + case Format::F32: + return 4; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + ARM_COMPUTE_ERROR("Undefined pixel size for given format"); + return 0; + } +} + +/** Return the plane index of a given channel given an input format. + * + * @param[in] format Input format + * @param[in] channel Input channel + * + * @return The plane index of the specific channel of the specific format + */ +inline int plane_idx_from_channel(Format format, Channel channel) +{ + switch (format) + { + // Single planar formats have a single plane + case Format::U8: + case Format::U16: + case Format::S16: + case Format::U32: + case Format::S32: + case Format::BFLOAT16: + case Format::F16: + case Format::F32: + case Format::UV88: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + return 0; + // Multi planar formats + case Format::NV12: + case Format::NV21: + { + // Channel U and V share the same plane of format UV88 + switch (channel) + { + case Channel::Y: + return 0; + case Channel::U: + case Channel::V: + return 1; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::IYUV: + case Format::YUV444: + { + switch (channel) + { + case Channel::Y: + return 0; + case Channel::U: + return 1; + case Channel::V: + return 2; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + default: + ARM_COMPUTE_ERROR("Not supported format"); + return 0; + } +} + +/** Return the channel index of a given channel given an input format. + * + * @param[in] format Input format + * @param[in] channel Input channel + * + * @return The channel index of the specific channel of the specific format + */ +inline int channel_idx_from_format(Format format, Channel channel) +{ + switch (format) + { + case Format::RGB888: + { + switch (channel) + { + case Channel::R: + return 0; + case Channel::G: + return 1; + case Channel::B: + return 2; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::RGBA8888: + { + switch (channel) + { + case Channel::R: + return 0; + case Channel::G: + return 1; + case Channel::B: + return 2; + case Channel::A: + return 3; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::YUYV422: + { + switch (channel) + { + case Channel::Y: + return 0; + case Channel::U: + return 1; + case Channel::V: + return 3; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::UYVY422: + { + switch (channel) + { + case Channel::Y: + return 1; + case Channel::U: + return 0; + case Channel::V: + return 2; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::NV12: + { + switch (channel) + { + case Channel::Y: + return 0; + case Channel::U: + return 0; + case Channel::V: + return 1; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::NV21: + { + switch (channel) + { + case Channel::Y: + return 0; + case Channel::U: + return 1; + case Channel::V: + return 0; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::YUV444: + case Format::IYUV: + { + switch (channel) + { + case Channel::Y: + return 0; + case Channel::U: + return 0; + case Channel::V: + return 0; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + default: + ARM_COMPUTE_ERROR("Not supported format"); + return 0; + } +} + +/** Return the number of planes for a given format + * + * @param[in] format Input format + * + * @return The number of planes for a given image format. + */ +inline size_t num_planes_from_format(Format format) +{ + switch (format) + { + case Format::U8: + case Format::S16: + case Format::U16: + case Format::S32: + case Format::U32: + case Format::BFLOAT16: + case Format::F16: + case Format::F32: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + return 1; + case Format::NV12: + case Format::NV21: + return 2; + case Format::IYUV: + case Format::YUV444: + return 3; + default: + ARM_COMPUTE_ERROR("Not supported format"); + return 0; + } +} + +/** Return the number of channels for a given single-planar pixel format + * + * @param[in] format Input format + * + * @return The number of channels for a given image format. + */ +inline size_t num_channels_from_format(Format format) +{ + switch (format) + { + case Format::U8: + case Format::U16: + case Format::S16: + case Format::U32: + case Format::S32: + case Format::BFLOAT16: + case Format::F16: + case Format::F32: + return 1; + // Because the U and V channels are subsampled + // these formats appear like having only 2 channels: + case Format::YUYV422: + case Format::UYVY422: + return 2; + case Format::UV88: + return 2; + case Format::RGB888: + return 3; + case Format::RGBA8888: + return 4; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + return 0; + } +} + +/** Convert a tensor format into a string. + * + * @param[in] format @ref Format to be translated to string. + * + * @return The string describing the format. + */ +const std::string &string_from_format(Format format); +} // namespace arm_compute +#endif /*ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H */ diff --git a/arm_compute/core/utils/InterpolationPolicyUtils.h b/arm_compute/core/utils/InterpolationPolicyUtils.h new file mode 100644 index 0000000000..8d4ae4321c --- /dev/null +++ b/arm_compute/core/utils/InterpolationPolicyUtils.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016-2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H +#define ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H + +#include "arm_compute/core/Types.h" + +#include <string> + +namespace arm_compute +{ +/** Translates a given interpolation policy to a string. + * + * @param[in] policy @ref InterpolationPolicy to be translated to string. + * + * @return The string describing the interpolation policy. + */ +const std::string &string_from_interpolation_policy(InterpolationPolicy policy); +} // namespace arm_compute +#endif /*ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H */ diff --git a/arm_compute/core/utils/StringUtils.h b/arm_compute/core/utils/StringUtils.h new file mode 100644 index 0000000000..c13cbaa334 --- /dev/null +++ b/arm_compute/core/utils/StringUtils.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016-2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H +#define ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H + +#include <string> +#include <vector> + +namespace arm_compute +{ +/** Lower a given string. + * + * @param[in] val Given string to lower. + * + * @return The lowered string + */ +std::string lower_string(const std::string &val); + +/** Raise a given string to upper case + * + * @param[in] val Given string to lower. + * + * @return The upper case string + */ +std::string upper_string(const std::string &val); + +/** Create a string with the float in full precision. + * + * @param val Floating point value + * + * @return String with the floating point value. + */ +std::string float_to_string_with_full_precision(float val); + +/** Join a sequence of strings with separator @p sep + * + * @param[in] strings Strings to join + * @param[in] sep Separator to join consecutive strings in the sequence + * + * @return std::string + */ +std::string join(const std::vector<std::string> strings, const std::string &sep); +} // namespace arm_compute +#endif /*ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H */ diff --git a/arm_compute/core/utils/helpers/AdjustVecSize.h b/arm_compute/core/utils/helpers/AdjustVecSize.h new file mode 100644 index 0000000000..842e3b57d6 --- /dev/null +++ b/arm_compute/core/utils/helpers/AdjustVecSize.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016-2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_UTILS_ADJUSTVECSIZE_H +#define ARM_COMPUTE_UTILS_ADJUSTVECSIZE_H + +#include "arm_compute/core/Error.h" + +namespace arm_compute +{ +/** Returns the adjusted vector size in case it is less than the input's first dimension, getting rounded down to its closest valid vector size + * + * @param[in] vec_size vector size to be adjusted + * @param[in] dim0 size of the first dimension + * + * @return the number of element processed along the X axis per thread + */ +inline unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0) +{ + ARM_COMPUTE_ERROR_ON(vec_size > 16); + + if ((vec_size >= dim0) && (dim0 == 3)) + { + return dim0; + } + + while (vec_size > dim0) + { + vec_size >>= 1; + } + + return vec_size; +} +} // namespace arm_compute +#endif /*ARM_COMPUTE_UTILS_H */ diff --git a/arm_compute/core/utils/helpers/tensor_transform.h b/arm_compute/core/utils/helpers/tensor_transform.h index faa5b4433c..7a61fa192a 100644 --- a/arm_compute/core/utils/helpers/tensor_transform.h +++ b/arm_compute/core/utils/helpers/tensor_transform.h @@ -52,7 +52,8 @@ int calculate_stride_on_index(int index, Coordinates strides); * * @return Absolute start position of a given index */ -int calculate_start_on_index(TensorShape input_shape, int index, Coordinates starts, Coordinates strides, int32_t begin_mask); +int calculate_start_on_index( + TensorShape input_shape, int index, Coordinates starts, Coordinates strides, int32_t begin_mask); /** Returns the absolute end position of a given index for a strided slice operation * @@ -68,8 +69,13 @@ int calculate_start_on_index(TensorShape input_shape, int index, Coordinates sta * * @return Absolute end position of a given index */ -int calculate_end_on_index(TensorShape input_shape, int index, int start_on_index, Coordinates ends, Coordinates strides, - int32_t end_mask = 0, int32_t shrink_axis_mask = 0); +int calculate_end_on_index(TensorShape input_shape, + int index, + int start_on_index, + Coordinates ends, + Coordinates strides, + int32_t end_mask = 0, + int32_t shrink_axis_mask = 0); /** Calculate start, end and stride coordinates for a strided slice * @@ -87,8 +93,12 @@ int calculate_end_on_index(TensorShape input_shape, int index, int start_on_inde * @return A tuple with <Start,End,Strides> */ std::tuple<Coordinates, Coordinates, Coordinates> calculate_strided_slice_coords(TensorShape input_shape, - Coordinates starts, Coordinates ends, Coordinates strides, - int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0); + Coordinates starts, + Coordinates ends, + Coordinates strides, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t shrink_axis_mask = 0); /** Computes output shape of strided slice * @@ -109,9 +119,14 @@ std::tuple<Coordinates, Coordinates, Coordinates> calculate_strided_slice_coords * * @return The output tensor shape */ -TensorShape compute_strided_slice_output_shape(TensorShape input_shape, Coordinates starts, Coordinates ends, Coordinates strides, - int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0, - bool return_unshrinked = false); +TensorShape compute_strided_slice_output_shape(TensorShape input_shape, + Coordinates starts, + Coordinates ends, + Coordinates strides, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t shrink_axis_mask = 0, + bool return_unshrinked = false); /** Constructs end mask in case we want to perform a slice operation using the strided slice interface * @@ -122,7 +137,7 @@ TensorShape compute_strided_slice_output_shape(TensorShape input_shape, Coordina * @return End mask */ int32_t construct_slice_end_mask(Coordinates ends); -} // namespace tensor_tranform +} // namespace tensor_transform } // namespace helpers } // namespace arm_compute #endif /* ARM_COMPUTE_UTILS_HELPERS_TENSOR_TRANSFORM_H */ diff --git a/arm_compute/core/utils/logging/FilePrinter.h b/arm_compute/core/utils/logging/FilePrinter.h index 0e5b84f084..a865aadddb 100644 --- a/arm_compute/core/utils/logging/FilePrinter.h +++ b/arm_compute/core/utils/logging/FilePrinter.h @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_LOGGING_FILE_PRINTER_H #define ARM_COMPUTE_LOGGING_FILE_PRINTER_H -#include "arm_compute/core/utils/logging/IPrinter.h" - #include "arm_compute/core/utils/io/FileHandler.h" +#include "arm_compute/core/utils/logging/IPrinter.h" namespace arm_compute { diff --git a/arm_compute/core/utils/logging/Helpers.h b/arm_compute/core/utils/logging/Helpers.h index 5f8b948592..c3c2f0f0b8 100644 --- a/arm_compute/core/utils/logging/Helpers.h +++ b/arm_compute/core/utils/logging/Helpers.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_LOGGING_HELPERS_H #include "arm_compute/core/utils/logging/Types.h" + #include "support/ToolchainSupport.h" #include <cstddef> @@ -45,7 +46,7 @@ namespace logging * @return The formatted string */ template <typename... Ts> -inline std::string string_with_format(const std::string &fmt, Ts &&... args) +inline std::string string_with_format(const std::string &fmt, Ts &&...args) { size_t size = support::cpp11::snprintf(nullptr, 0, fmt.c_str(), args...) + 1; auto char_str = std::make_unique<char[]>(size); diff --git a/arm_compute/core/utils/logging/IPrinter.h b/arm_compute/core/utils/logging/IPrinter.h index 42dca58ea1..7fde4d9302 100644 --- a/arm_compute/core/utils/logging/IPrinter.h +++ b/arm_compute/core/utils/logging/IPrinter.h @@ -35,8 +35,7 @@ class Printer { public: /** Default Constructor */ - Printer() noexcept - : _mtx() + Printer() noexcept : _mtx() { } /** Prevent instances of this class from being copied */ diff --git a/arm_compute/core/utils/logging/LogMsgDecorators.h b/arm_compute/core/utils/logging/LogMsgDecorators.h index 9c9e62740f..66a8180e21 100644 --- a/arm_compute/core/utils/logging/LogMsgDecorators.h +++ b/arm_compute/core/utils/logging/LogMsgDecorators.h @@ -63,8 +63,7 @@ public: * * @param str Sting to append */ - StringDecorator(const std::string &str) - : _str(str) + StringDecorator(const std::string &str) : _str(str) { _str = angle_wrap_value(str); } @@ -103,7 +102,7 @@ private: auto time = std::chrono::system_clock::to_time_t(now); // TODO: use put_time for gcc > 4.9 - char buf[100] = { 0 }; + char buf[100] = {0}; std::strftime(buf, sizeof(buf), "%d-%m-%Y %I:%M:%S", std::localtime(&time)); return buf; } diff --git a/arm_compute/core/utils/logging/Logger.h b/arm_compute/core/utils/logging/Logger.h index 4fc9bb7dbf..608db39138 100644 --- a/arm_compute/core/utils/logging/Logger.h +++ b/arm_compute/core/utils/logging/Logger.h @@ -88,7 +88,7 @@ public: * @param[in] args Message arguments */ template <typename... Ts> - void log(LogLevel log_level, const std::string &fmt, Ts &&... args); + void log(LogLevel log_level, const std::string &fmt, Ts &&...args); /** Sets log level of the logger * * @warning Not thread-safe @@ -159,11 +159,11 @@ private: }; template <typename... Ts> -inline void Logger::log(LogLevel log_level, const std::string &fmt, Ts &&... args) +inline void Logger::log(LogLevel log_level, const std::string &fmt, Ts &&...args) { // Return if message shouldn't be logged // i.e. if log level does not match the logger's - if(!is_loggable(log_level)) + if (!is_loggable(log_level)) { return; } diff --git a/arm_compute/core/utils/logging/LoggerRegistry.h b/arm_compute/core/utils/logging/LoggerRegistry.h index 7c9931a260..4e52a10935 100644 --- a/arm_compute/core/utils/logging/LoggerRegistry.h +++ b/arm_compute/core/utils/logging/LoggerRegistry.h @@ -27,6 +27,7 @@ #include "arm_compute/core/utils/logging/Logger.h" #include "arm_compute/core/utils/logging/Printers.h" #include "arm_compute/core/utils/logging/Types.h" + #include "support/Mutex.h" #include <memory> @@ -54,8 +55,9 @@ public: * @param[in] log_level Logger's log level. Defaults to INFO * @param[in] printers Printers to attach to the system loggers. Defaults with a @ref StdPrinter. */ - void create_logger(const std::string &name, LogLevel log_level = LogLevel::INFO, - const std::vector<std::shared_ptr<Printer>> &printers = { std::make_shared<StdPrinter>() }); + void create_logger(const std::string &name, + LogLevel log_level = LogLevel::INFO, + const std::vector<std::shared_ptr<Printer>> &printers = {std::make_shared<StdPrinter>()}); /** Remove a logger * * @param name Logger's name @@ -74,16 +76,17 @@ public: * @param[in] printers (Optional) Printers to attach to the system loggers. Defaults with a @ref StdPrinter. */ void create_reserved_loggers(LogLevel log_level = LogLevel::INFO, - const std::vector<std::shared_ptr<Printer>> &printers = { std::make_shared<StdPrinter>() }); + const std::vector<std::shared_ptr<Printer>> &printers = { + std::make_shared<StdPrinter>()}); private: /** Default constructor */ LoggerRegistry(); private: - arm_compute::Mutex _mtx; + arm_compute::Mutex _mtx; std::unordered_map<std::string, std::shared_ptr<Logger>> _loggers; - static std::set<std::string> _reserved_loggers; + static std::set<std::string> _reserved_loggers; }; } // namespace logging } // namespace arm_compute diff --git a/arm_compute/core/utils/logging/Macros.h b/arm_compute/core/utils/logging/Macros.h index 1108dd3800..4d5aa5fe2c 100644 --- a/arm_compute/core/utils/logging/Macros.h +++ b/arm_compute/core/utils/logging/Macros.h @@ -30,52 +30,66 @@ #ifdef ARM_COMPUTE_LOGGING_ENABLED +#ifdef __GNUC__ +inline std::string signature_name(const std::string &pretty_func) +{ + const auto scope_op = pretty_func.find("::"); + const auto begin = pretty_func.substr(0, scope_op).rfind(" ") + 1; + const auto end = pretty_func.rfind("(") - begin; + + return pretty_func.substr(begin, end) + "()"; +} +#define ARM_COMPUTE_SIGNATURE_NAME signature_name(__PRETTY_FUNCTION__) +#else /* __GNUC__ */ +#define ARM_COMPUTE_SIGNATURE_NAME (__func__) +#endif /* __GNUC__ */ + #define ARM_COMPUTE_LOG_MSG(logger_name, log_level, msg) \ do \ { \ auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \ - if(__logger != nullptr) \ + if (__logger != nullptr) \ { \ __logger->log(log_level, msg); \ } \ - } while(false) + } while (false) #define ARM_COMPUTE_LOG_MSG_WITH_FUNCNAME(logger_name, log_level, msg) \ do \ { \ auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \ - if(__logger != nullptr) \ + if (__logger != nullptr) \ { \ std::ostringstream s; \ - s << __func__ << ":" << msg; \ + s << ARM_COMPUTE_SIGNATURE_NAME << " : " << msg; \ __logger->log(log_level, s.str()); \ } \ - } while(false) + } while (false) #define ARM_COMPUTE_LOG_MSG_WITH_FORMAT(logger_name, log_level, fmt, ...) \ do \ { \ auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \ - if(__logger != nullptr) \ + if (__logger != nullptr) \ { \ size_t size = ::snprintf(nullptr, 0, fmt, __VA_ARGS__) + 1; \ auto char_str = std::make_unique<char[]>(size); \ - ::snprintf(char_str.get(), size, #fmt, __VA_ARGS__); \ + ::snprintf(char_str.get(), size, fmt, __VA_ARGS__); \ __logger->log(log_level, std::string(char_str.get(), char_str.get() + size - 1)); \ } \ - } while(false) + } while (false) #define ARM_COMPUTE_LOG_STREAM(logger_name, log_level, stream) \ do \ { \ auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \ - if(__logger != nullptr) \ + if (__logger != nullptr) \ { \ std::ostringstream s; \ s << stream; \ __logger->log(log_level, s.str()); \ } \ - } while(false) + } while (false) #else /* ARM_COMPUTE_LOGGING_ENABLED */ diff --git a/arm_compute/core/utils/logging/Types.h b/arm_compute/core/utils/logging/Types.h index f0ddae6c84..64c567b984 100644 --- a/arm_compute/core/utils/logging/Types.h +++ b/arm_compute/core/utils/logging/Types.h @@ -44,8 +44,7 @@ enum class LogLevel struct LogMsg { /** Default constructor */ - LogMsg() - : raw_(), log_level_(LogLevel::OFF) + LogMsg() : raw_(), log_level_(LogLevel::OFF) { } /** Construct a log message @@ -53,8 +52,7 @@ struct LogMsg * @param[in] msg Message to log. * @param[in] log_level Logging level. Default: OFF */ - LogMsg(std::string msg, LogLevel log_level = LogLevel::OFF) - : raw_(msg), log_level_(log_level) + LogMsg(std::string msg, LogLevel log_level = LogLevel::OFF) : raw_(msg), log_level_(log_level) { } diff --git a/arm_compute/core/utils/math/Math.h b/arm_compute/core/utils/math/Math.h new file mode 100644 index 0000000000..e70337ba0f --- /dev/null +++ b/arm_compute/core/utils/math/Math.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017-2018, 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_UTILS_MATH_H +#define ARM_COMPUTE_UTILS_MATH_H + +namespace arm_compute +{ +/** Calculate the rounded up quotient of val / m. + * + * @param[in] val Value to divide and round up. + * @param[in] m Value to divide by. + * + * @return the result. + */ +template <typename S, typename T> +constexpr auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m) +{ + return (val + m - 1) / m; +} + +/** Computes the smallest number larger or equal to value that is a multiple of divisor. + * + * @param[in] value Lower bound value + * @param[in] divisor Value to compute multiple of. + * + * @return the result. + */ +template <typename S, typename T> +inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor) +{ + ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0); + return DIV_CEIL(value, divisor) * divisor; +} + +/** Computes the largest number smaller or equal to value that is a multiple of divisor. + * + * @param[in] value Upper bound value + * @param[in] divisor Value to compute multiple of. + * + * @return the result. + */ +template <typename S, typename T> +inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) * divisor) +{ + ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0); + return (value / divisor) * divisor; +} + +} // namespace arm_compute +#endif /*ARM_COMPUTE_UTILS_MATH_H */ diff --git a/arm_compute/core/utils/math/SafeOps.h b/arm_compute/core/utils/math/SafeOps.h index f0d76a3d02..ef8bcf7e14 100644 --- a/arm_compute/core/utils/math/SafeOps.h +++ b/arm_compute/core/utils/math/SafeOps.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,8 @@ #define ARM_COMPUTE_UTILS_MATH_SAFE_OPS #include "arm_compute/core/Error.h" -#include "support/Requires.h" + +#include "support/AclRequires.h" #include <limits> @@ -51,11 +52,11 @@ T safe_integer_add(T val_a, T val_b) { T result = 0; - if((val_b > 0) && (val_a > std::numeric_limits<T>::max() - val_b)) + if ((val_b > 0) && (val_a > std::numeric_limits<T>::max() - val_b)) { result = std::numeric_limits<T>::max(); } - else if((val_b < 0) && (val_a < std::numeric_limits<T>::min() - val_b)) + else if ((val_b < 0) && (val_a < std::numeric_limits<T>::min() - val_b)) { result = std::numeric_limits<T>::min(); } @@ -83,11 +84,11 @@ T safe_integer_sub(T val_a, T val_b) { T result = 0; - if((val_b < 0) && (val_a > std::numeric_limits<T>::max() + val_b)) + if ((val_b < 0) && (val_a > std::numeric_limits<T>::max() + val_b)) { result = std::numeric_limits<T>::max(); } - else if((val_b > 0) && (val_a < std::numeric_limits<T>::min() + val_b)) + else if ((val_b > 0) && (val_a < std::numeric_limits<T>::min() + val_b)) { result = std::numeric_limits<T>::min(); } @@ -115,13 +116,13 @@ T safe_integer_mul(T val_a, T val_b) { T result = 0; - if(val_a > 0) + if (val_a > 0) { - if((val_b > 0) && (val_a > (std::numeric_limits<T>::max() / val_b))) + if ((val_b > 0) && (val_a > (std::numeric_limits<T>::max() / val_b))) { result = std::numeric_limits<T>::max(); } - else if(val_b < (std::numeric_limits<T>::min() / val_a)) + else if (val_b < (std::numeric_limits<T>::min() / val_a)) { result = std::numeric_limits<T>::min(); } @@ -132,11 +133,11 @@ T safe_integer_mul(T val_a, T val_b) } else { - if((val_b > 0) && (val_a < (std::numeric_limits<T>::min() / val_b))) + if ((val_b > 0) && (val_a < (std::numeric_limits<T>::min() / val_b))) { result = std::numeric_limits<T>::max(); } - else if((val_a != 0) && (val_b < (std::numeric_limits<T>::max() / val_a))) + else if ((val_a != 0) && (val_b < (std::numeric_limits<T>::max() / val_a))) { result = std::numeric_limits<T>::min(); } @@ -165,7 +166,7 @@ T safe_integer_div(T val_a, T val_b) { T result = 0; - if((val_b == 0) || ((val_a == std::numeric_limits<T>::min()) && (val_b == -1))) + if ((val_b == 0) || ((val_a == std::numeric_limits<T>::min()) && (val_b == -1))) { result = std::numeric_limits<T>::min(); } @@ -176,7 +177,7 @@ T safe_integer_div(T val_a, T val_b) return result; } -} // namespace cast +} // namespace math } // namespace utils } // namespace arm_compute #endif /* ARM_COMPUTE_UTILS_MATH_SAFE_OPS */ diff --git a/arm_compute/core/utils/misc/InfoHelpers.h b/arm_compute/core/utils/misc/InfoHelpers.h index ced0d24b56..1d1b4ea8d7 100644 --- a/arm_compute/core/utils/misc/InfoHelpers.h +++ b/arm_compute/core/utils/misc/InfoHelpers.h @@ -53,10 +53,12 @@ inline bool is_relu(ActivationLayerInfo activation_info) */ inline bool is_relu6(ActivationLayerInfo activation_info) { - const bool is_lu_bounded_relu = activation_info.activation() == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU - && activation_info.a() == 6.f && activation_info.b() == 0.f; - const bool is_bounded_relu = activation_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU - && activation_info.a() == 6.f; + const bool is_lu_bounded_relu = + activation_info.activation() == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU && + activation_info.a() == 6.f && activation_info.b() == 0.f; + const bool is_bounded_relu = + activation_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && + activation_info.a() == 6.f; return activation_info.enabled() && (is_lu_bounded_relu || is_bounded_relu); } @@ -68,34 +70,37 @@ inline bool is_relu6(ActivationLayerInfo activation_info) * */ template <typename T> -inline void build_lstm_params_tensor_info(const LSTMParams<T> &lstm_params, - LSTMParams<ITensorInfo> *lstm_params_info) +inline void build_lstm_params_tensor_info(const LSTMParams<T> &lstm_params, LSTMParams<ITensorInfo> *lstm_params_info) { - if(lstm_params.has_peephole_opt()) + if (lstm_params.has_peephole_opt()) { ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights()); - lstm_params_info->set_peephole_params(lstm_params.cell_to_forget_weights()->info(), lstm_params.cell_to_output_weights()->info()); + lstm_params_info->set_peephole_params(lstm_params.cell_to_forget_weights()->info(), + lstm_params.cell_to_output_weights()->info()); } - if(lstm_params.has_projection()) + if (lstm_params.has_projection()) { ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.projection_weights()); - lstm_params_info->set_projection_params(lstm_params.projection_weights()->info(), - lstm_params.projection_bias() != nullptr ? lstm_params.projection_bias()->info() : nullptr); + lstm_params_info->set_projection_params( + lstm_params.projection_weights()->info(), + lstm_params.projection_bias() != nullptr ? lstm_params.projection_bias()->info() : nullptr); } - if(!lstm_params.has_cifg_opt()) + if (!lstm_params.has_cifg_opt()) { - ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(), lstm_params.input_gate_bias()); + ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(), + lstm_params.input_gate_bias()); - ITensorInfo *cell_to_input_weights_info = (lstm_params.has_peephole_opt()) ? lstm_params.cell_to_input_weights()->info() : nullptr; - lstm_params_info->set_cifg_params(lstm_params.input_to_input_weights()->info(), lstm_params.recurrent_to_input_weights()->info(), - cell_to_input_weights_info, lstm_params.input_gate_bias()->info()); + ITensorInfo *cell_to_input_weights_info = + (lstm_params.has_peephole_opt()) ? lstm_params.cell_to_input_weights()->info() : nullptr; + lstm_params_info->set_cifg_params(lstm_params.input_to_input_weights()->info(), + lstm_params.recurrent_to_input_weights()->info(), cell_to_input_weights_info, + lstm_params.input_gate_bias()->info()); } - if(lstm_params.use_layer_norm()) + if (lstm_params.use_layer_norm()) { - ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(), - lstm_params.output_layer_norm_weights(), + ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(), lstm_params.output_layer_norm_weights(), lstm_params.cell_layer_norm_weights()); - if(!lstm_params.has_cifg_opt()) + if (!lstm_params.has_cifg_opt()) { ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_layer_norm_weights()); } @@ -103,15 +108,14 @@ inline void build_lstm_params_tensor_info(const LSTMParams<T> &lstm_params, ITensorInfo *forget_info = lstm_params.forget_layer_norm_weights()->info(); ITensorInfo *cell_info = lstm_params.cell_layer_norm_weights()->info(); ITensorInfo *output_info = lstm_params.output_layer_norm_weights()->info(); - ITensorInfo *input_info = lstm_params.has_cifg_opt() ? nullptr : lstm_params.input_layer_norm_weights()->info(); + ITensorInfo *input_info = lstm_params.has_cifg_opt() ? nullptr : lstm_params.input_layer_norm_weights()->info(); lstm_params_info->set_layer_normalization_params(input_info, forget_info, cell_info, output_info); } - lstm_params_info->set_matmul_scale_params(lstm_params.input_intermediate_scale(), - lstm_params.forget_intermediate_scale(), - lstm_params.cell_intermediate_scale(), - lstm_params.output_intermediate_scale()); + lstm_params_info->set_matmul_scale_params( + lstm_params.input_intermediate_scale(), lstm_params.forget_intermediate_scale(), + lstm_params.cell_intermediate_scale(), lstm_params.output_intermediate_scale()); lstm_params_info->set_hidden_state_params(lstm_params.hidden_state_zero(), lstm_params.hidden_state_scale()); } diff --git a/arm_compute/core/utils/misc/MMappedFile.h b/arm_compute/core/utils/misc/MMappedFile.h index b3e0994b5b..3efdbc5bda 100644 --- a/arm_compute/core/utils/misc/MMappedFile.h +++ b/arm_compute/core/utils/misc/MMappedFile.h @@ -24,7 +24,7 @@ #ifndef ARM_COMPUTE_MISC_MMAPPED_FILE_H #define ARM_COMPUTE_MISC_MMAPPED_FILE_H -#if !defined(BARE_METAL) +#if !defined(_WIN64) && !defined(BARE_METAL) #include <string> #include <utility> @@ -105,6 +105,6 @@ private: } // namespace mmap_io } // namespace utils } // namespace arm_compute -#endif // !defined(BARE_METAL) +#endif // !defined(_WIN64) &&!defined(BARE_METAL) #endif /* ARM_COMPUTE_MISC_MMAPPED_FILE_H */ diff --git a/arm_compute/core/utils/misc/Macros.h b/arm_compute/core/utils/misc/Macros.h index de66b6a52f..fa861fa442 100644 --- a/arm_compute/core/utils/misc/Macros.h +++ b/arm_compute/core/utils/misc/Macros.h @@ -26,15 +26,16 @@ #if defined(__cplusplus) && (__cplusplus >= 201402L) -#define ARM_COMPUTE_DEPRECATED [[deprecated]] -#define ARM_COMPUTE_DEPRECATED_REL(rel) [[deprecated("Deprecated in : " #rel)]] +#define ARM_COMPUTE_DEPRECATED [[deprecated]] +#define ARM_COMPUTE_DEPRECATED_REL(rel) [[deprecated("Deprecated in : " #rel)]] #define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) [[deprecated("Deprecated in : " #rel " - Use : " #replace)]] #elif defined(__GNUC__) || defined(__clang__) -#define ARM_COMPUTE_DEPRECATED __attribute__((deprecated)) +#define ARM_COMPUTE_DEPRECATED __attribute__((deprecated)) #define ARM_COMPUTE_DEPRECATED_REL(rel) __attribute__((deprecated("Deprecated in : " #rel))) -#define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) __attribute__((deprecated("Deprecated in : " #rel " - Use : " #replace))) +#define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) \ + __attribute__((deprecated("Deprecated in : " #rel " - Use : " #replace))) #else // defined(__cplusplus) && (__cplusplus >= 201402L) diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index d0dc202f91..e97d81390e 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,15 +21,16 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H -#define ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H +#ifndef ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H +#define ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Utils.h" - #include "arm_compute/core/utils/helpers/tensor_transform.h" +#include "arm_compute/function_info/ConvolutionInfo.h" +#include "arm_compute/runtime/FunctionDescriptors.h" #include <cmath> @@ -55,20 +56,27 @@ inline TensorShape calculate_reduce_mean_shape(ITensorInfo *input, const Coordin convert_negative_axis(axis_local, input_dims); TensorShape out_shape = input->tensor_shape(); // Configure reshape layer if we want to drop the dimensions - if(!keep_dims) + if (!keep_dims) { // We have to sort the reduction axis vectors in order for remove_dimension // to work properly + +// Suppress warning produced by a compiler bug in GCC +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104165 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" std::sort(axis_local.begin(), axis_local.begin() + reduction_ops); - for(int i = 0; i < reduction_ops; ++i) +#pragma GCC diagnostic pop + + for (int i = 0; i < reduction_ops; ++i) { - out_shape.remove_dimension(axis_local[i] - i); + out_shape.remove_dimension(axis_local[i] - i, false); } return out_shape; } else { - for(int i = 0; i < reduction_ops; ++i) + for (int i = 0; i < reduction_ops; ++i) { out_shape.set(axis_local[i], 1); } @@ -84,7 +92,10 @@ inline TensorShape calculate_reduce_mean_shape(ITensorInfo *input, const Coordin * * @return the calculated shape */ -inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input, size_t conv_w, size_t conv_h, const DataLayout &data_layout) +inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input, + size_t conv_w, + size_t conv_h, + const DataLayout &data_layout) { const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); @@ -126,10 +137,12 @@ inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL); ARM_COMPUTE_ERROR_ON(stride <= 0); - ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_width] % stride != 0), "The width of the input tensor must be a multiple of stride"); - ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_height] % stride != 0), "The height of the input tensor must be a multiple of stride"); + ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_width] % stride != 0), + "The width of the input tensor must be a multiple of stride"); + ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_height] % stride != 0), + "The height of the input tensor must be a multiple of stride"); - TensorShape output_shape{ input.tensor_shape() }; + TensorShape output_shape{input.tensor_shape()}; output_shape.set(idx_width, output_shape[idx_width] / stride); output_shape.set(idx_height, output_shape[idx_height] / stride); @@ -146,7 +159,8 @@ inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t * * @return the calculated shape of the reshaped weights */ -inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1) +inline TensorShape +compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1) { // Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. ARM_COMPUTE_ERROR_ON(num_groups == 0); @@ -154,14 +168,14 @@ inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bo ARM_COMPUTE_ERROR_ON((weights.dimension(3) % num_groups) != 0); // Calculate output shape - TensorShape weights_reshaped{ weights.tensor_shape() }; + TensorShape weights_reshaped{weights.tensor_shape()}; weights_reshaped.set(3, weights_reshaped[3] / num_groups); weights_reshaped.collapse(3); const size_t tmp_dim = weights_reshaped[0]; weights_reshaped.set(0, weights_reshaped[1]); weights_reshaped.set(1, tmp_dim + (has_bias ? 1 : 0)); - if(weights.num_dimensions() < 5) + if (weights.num_dimensions() < 5) { weights_reshaped.set(2, num_groups); } @@ -177,7 +191,9 @@ inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bo * * @return the calculated shape */ -inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false) +inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, + const GEMMLHSMatrixInfo &lhs_info, + bool reinterpret_input_as_3d = false) { ARM_COMPUTE_ERROR_ON(lhs_info.m0 == 0); ARM_COMPUTE_ERROR_ON(lhs_info.k0 == 0); @@ -198,11 +214,11 @@ inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLH const unsigned int output_width = block_size * num_horiz_blocks * lhs_info.v0; const unsigned int output_height = std::ceil(num_vert_blocks / static_cast<float>(lhs_info.v0)); - TensorShape lhs_shape{ a.tensor_shape() }; + TensorShape lhs_shape{a.tensor_shape()}; lhs_shape.set(0, output_width); lhs_shape.set(1, output_height); - if((reinterpret_input_as_3d) && (lhs_shape.num_dimensions() > 2)) + if ((reinterpret_input_as_3d) && (lhs_shape.num_dimensions() > 2)) { // When the data format is NHWC and the shapes are Nx1x1 // the tensor shape num_dimensions is automatically set to 1 instead of 3. @@ -242,7 +258,7 @@ inline TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRH const unsigned int output_width = block_size * num_vert_blocks * rhs_info.h0; const unsigned int output_height = std::ceil(num_horiz_blocks / static_cast<float>(rhs_info.h0)); - TensorShape rhs_shape{ a.tensor_shape() }; + TensorShape rhs_shape{a.tensor_shape()}; rhs_shape.set(0, output_width); rhs_shape.set(1, output_height); @@ -257,14 +273,15 @@ inline TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRH * * @return the calculated shape */ -inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false) +inline TensorShape +compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false) { // The interleaved output matrix will have the following shape: [ a_height * W, ceil(a_width / W) ] where W = 4 * mult_interleave4x4_height ARM_COMPUTE_ERROR_ON(mult_interleave4x4_height < 1); const int interleave_width = 4 * mult_interleave4x4_height; - TensorShape shape_interleaved_a{ a.tensor_shape() }; + TensorShape shape_interleaved_a{a.tensor_shape()}; shape_interleaved_a.set(0, a.dimension(0) * interleave_width); - if(reinterpret_input_as_3d) + if (reinterpret_input_as_3d) { const int M = a.dimension(1) * a.dimension(2); const int height = std::ceil(M / static_cast<float>(interleave_width)); @@ -274,7 +291,7 @@ inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_inte // the tensor shape num_dimensions is automatically set to 1 instead of 3. // To avoid failures by removing a dimension that doesn't exist // check if the number of dimensions is greater than 2. - if(shape_interleaved_a.num_dimensions() > 2) + if (shape_interleaved_a.num_dimensions() > 2) { shape_interleaved_a.remove_dimension(2); } @@ -296,7 +313,7 @@ inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_inte inline TensorShape compute_transpose1xW_shape(const ITensorInfo &b) { // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ] - TensorShape shape_transposed1xW_b{ b.tensor_shape() }; + TensorShape shape_transposed1xW_b{b.tensor_shape()}; shape_transposed1xW_b.set(0, b.dimension(1) * 16); shape_transposed1xW_b.set(1, std::ceil(b.dimension(0) / 16.f)); @@ -316,7 +333,7 @@ inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInf // The transpose1xW output matrix will have the following shape: // [ b_height * W, ceil(b_width / W) ] where W = (16 / element size of the tensor) * mult_transpose1xW_width ARM_COMPUTE_ERROR_ON(mult_transpose1xW_width < 1); - TensorShape shape_transposed1xW_b{ b.tensor_shape() }; + TensorShape shape_transposed1xW_b{b.tensor_shape()}; const size_t transpose_width = (16 / b.element_size()) * mult_transpose1xW_width; shape_transposed1xW_b.set(0, b.dimension(1) * transpose_width); shape_transposed1xW_b.set(1, static_cast<size_t>(std::ceil(b.dimension(0) / static_cast<float>(transpose_width)))); @@ -332,8 +349,8 @@ inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInf */ inline TensorShape compute_reductionA_shape(const ITensorInfo &b) { - TensorShape shape_vector_sum_col{ b.tensor_shape() }; - if(shape_vector_sum_col.num_dimensions() > 1) + TensorShape shape_vector_sum_col{b.tensor_shape()}; + if (shape_vector_sum_col.num_dimensions() > 1) { shape_vector_sum_col.remove_dimension(1); } @@ -349,9 +366,9 @@ inline TensorShape compute_reductionA_shape(const ITensorInfo &b) */ inline TensorShape compute_reductionB_shape(const ITensorInfo &a) { - TensorShape shape_vector_sum_row{ a.tensor_shape() }; + TensorShape shape_vector_sum_row{a.tensor_shape()}; shape_vector_sum_row.set(Window::DimX, a.dimension(1)); - if(shape_vector_sum_row.num_dimensions() > 1) + if (shape_vector_sum_row.num_dimensions() > 1) { shape_vector_sum_row.remove_dimension(1); } @@ -368,7 +385,10 @@ inline TensorShape compute_reductionB_shape(const ITensorInfo &a) * * @return the calculated shape */ -inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &convolved_dims, bool batch_size_on_z, unsigned int num_groups = 1) +inline TensorShape compute_col2im_shape(const ITensorInfo &input, + const Size2D &convolved_dims, + bool batch_size_on_z, + unsigned int num_groups = 1) { ARM_COMPUTE_ERROR_ON(num_groups == 0); ARM_COMPUTE_ERROR_ON(input.tensor_shape()[1] != (convolved_dims.area())); @@ -379,10 +399,10 @@ inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D & const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - TensorShape col2im_shape{ input.tensor_shape() }; + TensorShape col2im_shape{input.tensor_shape()}; // If batches start on 3rd dimension shift dimensions right by 1 to retain upper tensor shape, // as first three will be override by H,W,C data - if(batch_size_on_z && num_groups == 1) + if (batch_size_on_z && num_groups == 1) { col2im_shape.shift_right(1); } @@ -401,10 +421,10 @@ inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D & */ inline TensorShape compute_transposed_shape(const ITensorInfo &input) { - TensorShape shape_transposed{ input.tensor_shape() }; + TensorShape shape_transposed{input.tensor_shape()}; - shape_transposed.set(0, input.dimension(1)); - shape_transposed.set(1, input.dimension(0)); + shape_transposed.set(0, input.dimension(1), false); + shape_transposed.set(1, input.dimension(0), false); return shape_transposed; } @@ -417,10 +437,11 @@ inline TensorShape compute_transposed_shape(const ITensorInfo &input) * * @return the calculated shape */ -inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info) +inline TensorShape +compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info) { - const TensorShape input_shape{ input.tensor_shape() }; - const TensorShape weights_shape{ weights.tensor_shape() }; + const TensorShape input_shape{input.tensor_shape()}; + const TensorShape weights_shape{weights.tensor_shape()}; const DataLayout data_layout = input.data_layout(); const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); @@ -428,16 +449,16 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); const DataLayout weights_data_layout = weights.data_layout(); - const int weights_width_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH); - const int weights_height_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT); + const int weights_width_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH); + const int weights_height_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT); unsigned int output_width = 0; unsigned int output_height = 0; - std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx], - weights_shape[weights_width_idx], weights_shape[weights_height_idx], - info.pad_stride_info, info.dilation); + std::tie(output_width, output_height) = + scaled_dimensions(input_shape[width_idx], input_shape[height_idx], weights_shape[weights_width_idx], + weights_shape[weights_height_idx], info.pad_stride_info, info.dilation); - TensorShape output_shape{ input_shape }; + TensorShape output_shape{input_shape}; output_shape.set(width_idx, output_width); output_shape.set(height_idx, output_height); output_shape.set(channel_idx, input_shape[channel_idx] * info.depth_multiplier); @@ -445,6 +466,37 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, return output_shape; } +/** Calculate padding required for deconvolution + * + * @param[in] input Input tensor info + * @param[in] weights Weights tensor shape + * @param[in] sx Stride on x axis + * @param[in] sy Stride on y axis + * @param[in] out_dims Output shape dimensions + * + * @return the padding required + */ +inline std::pair<int32_t, int32_t> compute_deconvolution_padding(const ITensorInfo &input, + const ITensorInfo &weights, + int32_t sx, + int32_t sy, + std::pair<uint32_t, uint32_t> out_dims) +{ + const DataLayout data_layout = input.data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + // Find the upsampled dimensions + int32_t out_x = (static_cast<int32_t>(input.dimension(idx_w)) - 1) * sx + 1; + int32_t out_y = (static_cast<int32_t>(input.dimension(idx_h)) - 1) * sy + 1; + + // Find the padding needed for the convolution with stride 1 in order to match output shape + int32_t padx = out_dims.first - (out_x - static_cast<int32_t>(weights.dimension(idx_w)) + 1); + int32_t pady = out_dims.second - (out_y - static_cast<int32_t>(weights.dimension(idx_h)) + 1); + + return std::make_pair(padx, pady); +} + /** Calculate the upsampled output shape used for deconvolution * * @param[in] input Input tensor info @@ -457,20 +509,28 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, * * @return the calculated shape */ -inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy, - std::pair<unsigned int, unsigned int> &out_dims, uint32_t &padx, uint32_t &pady) +inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, + const ITensorInfo &weights, + unsigned int sx, + unsigned int sy, + std::pair<unsigned int, unsigned int> &out_dims, + uint32_t &padx, + uint32_t &pady) { + // Find the padding needed for the convolution with stride 1 in order to match output shape + const auto padxy = + compute_deconvolution_padding(input, weights, static_cast<int32_t>(sx), static_cast<int32_t>(sy), out_dims); + padx = static_cast<uint32_t>(padxy.first); + pady = static_cast<uint32_t>(padxy.second); + const DataLayout data_layout = input.data_layout(); const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); // Find the upsampled dimensions - unsigned int out_x = (input.dimension(idx_w) - 1) * sx + 1; - unsigned int out_y = (input.dimension(idx_h) - 1) * sy + 1; + uint32_t out_x = (input.dimension(idx_w) - 1) * sx + 1; + uint32_t out_y = (input.dimension(idx_h) - 1) * sy + 1; - // Find the padding needed for the convolution with stride 1 in order to match output shape - padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1); - pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1); out_x += padx; out_y += pady; @@ -489,10 +549,12 @@ inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &inpu * * @return the calculated shape */ -inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, const ITensorInfo &input, const ITensorInfo &weights) +inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, + const ITensorInfo &input, + const ITensorInfo &weights) { - const TensorShape input_shape{ input.tensor_shape() }; - const TensorShape weights_shape{ weights.tensor_shape() }; + const TensorShape input_shape{input.tensor_shape()}; + const TensorShape weights_shape{weights.tensor_shape()}; const DataLayout data_layout = input.data_layout(); const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); @@ -500,7 +562,7 @@ inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned i const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); - TensorShape out_shape{ input_shape }; + TensorShape out_shape{input_shape}; out_shape.set(width_idx, out_dims.first); out_shape.set(height_idx, out_dims.second); out_shape.set(channel_idx, weights_shape[batch_idx]); @@ -516,11 +578,18 @@ inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned i * @param[in] dilation Dilation, in elements, across x and y * @param[in] batch_size_on_z True if batch size is on z axis * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution + * @param[in] input_pad_right (Optional) When fast-math is selected, per element padding for the im2col matrix may be necessary * * @return the calculated shape */ -inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, - unsigned int num_groups = 1) +inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, + const Size2D &kernel_dims, + const PadStrideInfo &conv_info, + bool has_bias, + const Size2D &dilation, + bool batch_size_on_z, + unsigned int num_groups = 1, + unsigned int input_pad_right = 0) { // The output shape will be the 3D shape [ out_channels * kernel_area, num_elems_per_out_channel, batches ] if batch_size_on_z == true // or the 4D shape [ out_channels * kernel_area / num_groups, num_elems_per_out_channel, num_groups, batches ] if batch_size_on_z == false @@ -529,17 +598,19 @@ inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Siz ARM_COMPUTE_ERROR_ON(num_groups > 1 && input->data_layout() != DataLayout::NCHW); ARM_COMPUTE_ERROR_ON(num_groups > 1 && batch_size_on_z); - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; const DataLayout data_layout = input->data_layout(); const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation); - output_shape.set(0, (output_shape[channel_idx] / num_groups * kernel_dims.area() + (has_bias ? 1 : 0))); // NOLINT + std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions( + output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation); + output_shape.set(0, ((output_shape[channel_idx] + input_pad_right) / num_groups * kernel_dims.area() + + (has_bias ? 1 : 0))); // NOLINT output_shape.set(1, (out_dims.first * out_dims.second)); - if(batch_size_on_z && output_shape.num_dimensions() >= 3) + if (batch_size_on_z && output_shape.num_dimensions() >= 3) { output_shape.remove_dimension(2); } @@ -561,7 +632,7 @@ inline TensorShape compute_flatten_shape(const ITensorInfo *input) { // The output shape will be the flatten version of the input (i.e. [ width * height * channels, num_batches, ... ] ). Used for FlattenLayer and FullyConnectedLayer. - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; output_shape.collapse(3); @@ -583,7 +654,7 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis = // - [x,y,z,w] and axis 3 will return [x*y*z, w] TensorShape shape2D = input->tensor_shape(); - if(axis < input->num_dimensions()) + if (axis < input->num_dimensions()) { // Collapse from axis onward (this changes the shape) shape2D.collapse_from(axis); @@ -597,7 +668,7 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis = shape2D.collapse(shape2D.num_dimensions()); } - if(axis == 0) + if (axis == 0) { // If axis is zero the first dim should be one. Since // collapse is an inclusive operation we need to shift @@ -616,15 +687,17 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis = */ inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info) { - TensorShape tensor_shape{ input.tensor_shape() }; + TensorShape tensor_shape{input.tensor_shape()}; const Size2D kernel_size = winograd_info.kernel_size; const Size2D output_tile_size = winograd_info.output_tile_size; - const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); + const Size2D input_tile_size = + Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH)); tensor_shape.set(Window::DimX, input.dimension(3)); - tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL))); + tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), + DataLayoutDimension::CHANNEL))); tensor_shape.set(Window::DimZ, input_tile_size.area()); return tensor_shape; @@ -642,23 +715,22 @@ inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &inp const PadStrideInfo conv_info = winograd_info.convolution_info; const Size2D kernel_size = winograd_info.kernel_size; const Size2D output_tile_size = winograd_info.output_tile_size; - const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); + const Size2D input_tile_size = + Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); const size_t idx_w = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); const size_t idx_c = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL); // Compute the number of output tiles along the x and y direction of size "output_tile_size" - const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]), - kernel_size, - output_tile_size, - conv_info); + const Size2D num_tiles = compute_winograd_convolution_tiles( + Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]), kernel_size, output_tile_size, conv_info); const unsigned int width = input.tensor_shape()[idx_c]; const unsigned int height = num_tiles.area(); const unsigned int depth = input_tile_size.area(); - TensorShape output_shape{ input.tensor_shape() }; + TensorShape output_shape{input.tensor_shape()}; output_shape.set(0, width); output_shape.set(1, height); output_shape.set(2, depth); @@ -681,12 +753,12 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in const DataLayout data_layout = winograd_info.output_data_layout; // Compute output shape - unsigned int output_width = 0; - unsigned int output_height = 0; + unsigned int output_width = 0; + unsigned int output_height = 0; std::tie(output_width, output_height) = scaled_dimensions(input_dimensions.width, input_dimensions.height, kernel_size.width, kernel_size.height, conv_info); - TensorShape tensor_shape{ input.tensor_shape() }; + TensorShape tensor_shape{input.tensor_shape()}; // Output dimension const unsigned int out_w = output_width; @@ -702,20 +774,21 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in /** Calculate the deep convolution shape output shape of a tensor * - * @param[in] input Input tensor info - * @param[in] weights Weights tensor info - * @param[in] conv_info Contains padding and stride information + * @param[in] input_shape Input tensor shape + * @param[in] input_data_layout Input data layout + * @param[in] weights_shape Weights tensor shape + * @param[in] conv_info Contains padding and stride information * * @return the calculated shape */ -inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info) +inline TensorShape compute_deep_convolution_shape(const TensorShape &input_shape, + DataLayout input_data_layout, + const TensorShape &weights_shape, + const PadStrideInfo &conv_info) { - const TensorShape input_shape{ input.tensor_shape() }; - const TensorShape weights_shape{ weights.tensor_shape() }; - - const size_t idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); - const size_t idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); - const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL); + const size_t idx_width = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::WIDTH); + const size_t idx_height = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::HEIGHT); + const size_t idx_channel = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::CHANNEL); const unsigned int input_width = input_shape[idx_width]; const unsigned int input_height = input_shape[idx_height]; @@ -724,9 +797,10 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons const unsigned int weights_out_channel = weights_shape[3]; unsigned int output_width = 0; unsigned int output_height = 0; - std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info); + std::tie(output_width, output_height) = + scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info); - TensorShape output_shape{ input_shape }; + TensorShape output_shape{input_shape}; output_shape.set(idx_width, output_width); output_shape.set(idx_height, output_height); output_shape.set(idx_channel, weights_out_channel); @@ -734,6 +808,53 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons return output_shape; } +/** Calculate the deep convolution shape output shape of a tensor + * + * @param[in] input Input tensor info + * @param[in] weights Weights tensor info + * @param[in] conv_info Contains padding and stride information + * + * @return the calculated shape + */ +inline TensorShape +compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &conv_info) +{ + return compute_deep_convolution_shape(input.tensor_shape(), input.data_layout(), weights.tensor_shape(), conv_info); +} + +/** Calculate the indirect buffer output shape used by the indirect convolution function + * + * @param[in] input_shape Input tensor shape + * @param[in] input_data_layout Input data layout + * @param[in] weights_shape Weights tensor shape + * @param[in] conv_info Contains padding and stride information + * @param[in] desc Contains the direct/indirect convolution compute arguments, such as the tiling dimensions + * + * @return the calculated shape + */ +inline TensorShape compute_indirect_buffer_shape(const TensorShape &input_shape, + DataLayout input_data_layout, + const TensorShape &weights_shape, + const PadStrideInfo &conv_info, + const DirectConvComputeKernelInfo &desc) +{ + ARM_COMPUTE_ERROR_ON_MSG(input_data_layout != DataLayout::NHWC, "The data layout can only be NHWC"); + ARM_COMPUTE_ERROR_ON_MSG(desc.m0 <= 0 || desc.m0 > 8, "M0 can only be greater than 0 and less than or equal to 8"); + + const unsigned int m0 = desc.m0; + const unsigned int kw = weights_shape[1]; + const unsigned int kh = weights_shape[2]; + + TensorShape output_conv2d_shape = + compute_deep_convolution_shape(input_shape, input_data_layout, weights_shape, conv_info); + + const unsigned int output_w = m0 * kw * kh; + const unsigned int output_h = DIV_CEIL(output_conv2d_shape[1] * output_conv2d_shape[2], m0); + const unsigned int output_b = output_conv2d_shape[3]; + + return TensorShape(output_w, output_h, output_b); +} + /** Calculate the min/max shape output shape of a tensor * * @param[in] input Input tensor info @@ -742,7 +863,7 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons */ inline TensorShape compute_min_max_shape(const ITensorInfo *input) { - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; output_shape.set(Window::DimX, 2); output_shape.remove_dimension(1); output_shape.remove_dimension(1); @@ -762,7 +883,7 @@ inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo int pooled_w = 0; int pooled_h = 0; - TensorShape output_shape{ input.tensor_shape() }; + TensorShape output_shape{input.tensor_shape()}; const bool is_global_pooling = pool_info.is_global_pooling; const int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); @@ -772,9 +893,8 @@ inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo const int pool_size_x = is_global_pooling ? output_shape[idx_width] : pool_info.pool_size.width; const int pool_size_y = is_global_pooling ? output_shape[idx_height] : pool_info.pool_size.height; - std::tie(pooled_w, pooled_h) = scaled_dimensions_signed(input_width, input_height, - pool_size_x, pool_size_y, - pool_info.pad_stride_info); + std::tie(pooled_w, pooled_h) = + scaled_dimensions_signed(input_width, input_height, pool_size_x, pool_size_y, pool_info.pad_stride_info); ARM_COMPUTE_ERROR_ON_MSG((pooled_w < 1 || pooled_h < 1), "Calculated output dimension size is invalid"); @@ -807,8 +927,10 @@ inline TensorShape compute_unpool_shape(const ITensorInfo &input, PoolingLayerIn const int pad_bottom = pad_stride_info.pad_bottom(); TensorShape output_shape = input_shape; - const unsigned int out_width = (input_shape[idx_width] - 1) * stride_x - pad_left - pad_right + pool_info.pool_size.width; - const unsigned int out_height = (input_shape[idx_height] - 1) * stride_y - pad_top - pad_bottom + pool_info.pool_size.height; + const unsigned int out_width = + (input_shape[idx_width] - 1) * stride_x - pad_left - pad_right + pool_info.pool_size.width; + const unsigned int out_height = + (input_shape[idx_height] - 1) * stride_y - pad_top - pad_bottom + pool_info.pool_size.height; output_shape.set(idx_width, out_width); output_shape.set(idx_height, out_height); @@ -823,9 +945,10 @@ inline TensorShape compute_unpool_shape(const ITensorInfo &input, PoolingLayerIn * * @return the calculated shape */ -inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info) +inline TensorShape +compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info) { - TensorShape output_shape{ input.tensor_shape() }; + TensorShape output_shape{input.tensor_shape()}; const unsigned int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); @@ -846,7 +969,7 @@ inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITens */ inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned int batch_size) { - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; output_shape.set(1, batch_size); return output_shape; @@ -861,15 +984,21 @@ inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned in * * @return the calculated shape */ -inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info) +inline TensorShape compute_mm_shape(const ITensorInfo &input0, + const ITensorInfo &input1, + bool is_interleaved_transposed, + const GEMMReshapeInfo &reshape_info) { ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4"); - ARM_COMPUTE_ERROR_ON_MSG(is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(), "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true"); + ARM_COMPUTE_ERROR_ON_MSG( + is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(), + "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true"); const bool reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d(); const bool reinterpret_output_as_3d = reshape_info.depth_output_gemm3d() != 0; const int depth_output_gemm3d = reinterpret_output_as_3d ? reshape_info.depth_output_gemm3d() : 1; - const int m = reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1); + const int m = + reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1); // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third // dimension of the output tensor @@ -878,7 +1007,7 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo const int dim2 = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2]; const int dim3 = reinterpret_input_as_3d ? 1 : input0.tensor_shape()[3]; - TensorShape output_shape{ input0.tensor_shape() }; + TensorShape output_shape{input0.tensor_shape()}; output_shape.set(0, dim0); output_shape.set(1, dim1); @@ -897,7 +1026,8 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo * * @return the calculated shape */ -inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info) +inline TensorShape +compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info) { ARM_COMPUTE_UNUSED(input1); ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4"); @@ -906,9 +1036,9 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d() != 0; const int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d() : 1; - TensorShape output_shape{ input0.tensor_shape() }; + TensorShape output_shape{input0.tensor_shape()}; - if(!reinterpret_input_as_3d && !reinterpret_output_as_3d) + if (!reinterpret_input_as_3d && !reinterpret_output_as_3d) { output_shape.set(0, gemm_info.n()); output_shape.set(1, gemm_info.m()); @@ -935,7 +1065,8 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo * * @return the calculated shape */ -inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info) +inline TensorShape +compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info) { ARM_COMPUTE_UNUSED(input1); ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4"); @@ -944,9 +1075,9 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0; const unsigned int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d : 1; - TensorShape output_shape{ input0.tensor_shape() }; + TensorShape output_shape{input0.tensor_shape()}; - if(!reinterpret_input_as_3d && !reinterpret_output_as_3d) + if (!reinterpret_input_as_3d && !reinterpret_output_as_3d) { output_shape.set(0, gemm_info.n); output_shape.set(1, gemm_info.m); @@ -967,20 +1098,50 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo /** Calculate the matrix multiplication output shape of two tensors * + * @param[in] input0 First input tensor info + * @param[in] input1 Second input tensor info + * @param[in] matmul_info Batch MatMul Kernel info to know which matrix is transposed + * + * @return the calculated shape + */ +inline TensorShape +compute_matmul_shape(const TensorShape &input0, const TensorShape &input1, const MatMulKernelInfo &matmul_info) +{ + TensorShape output_shape{input0}; + + if (matmul_info.adj_lhs) + { + output_shape.set(1, input0[0]); // The vertical (M) dimension + } + + if (matmul_info.adj_rhs) + { + output_shape.set(0, input1[1]); // The horizontal (N) dimension + } + else + { + output_shape.set(0, input1[0]); // The horizontal (N) dimension + } + + return output_shape; +} +/** Calculate the matrix multiplication output shape of two tensors + * * @param[in] input Input tensor info * @param[in] gemm_3d_depth (Optional) GEMM 3d depth * @param[in] batch_size_on_z (Optional) True if batch size is on z axis * * @return the calculated shape */ -inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false) +inline TensorShape +compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false) { ARM_COMPUTE_ERROR_ON(input.data_layout() != DataLayout::NHWC && gemm_3d_depth > 1); TensorShape output_shape = input.tensor_shape(); - if(gemm_3d_depth > 1) + if (gemm_3d_depth > 1) { - if(batch_size_on_z) + if (batch_size_on_z) { output_shape.shift_right(1); } @@ -1005,11 +1166,16 @@ inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned * @return the calculated shape */ inline TensorShape compute_strided_slice_shape(const ITensorInfo &input, - const Coordinates &starts, const Coordinates &ends, const Coordinates &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) + const Coordinates &starts, + const Coordinates &ends, + const Coordinates &strides, + int32_t begin_mask, + int32_t end_mask, + int32_t shrink_axis_mask) { using namespace arm_compute::helpers::tensor_transform; - return compute_strided_slice_output_shape(input.tensor_shape(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); + return compute_strided_slice_output_shape(input.tensor_shape(), starts, ends, strides, begin_mask, end_mask, + shrink_axis_mask); } /** Calculate the slice output shape of a tensor @@ -1020,36 +1186,48 @@ inline TensorShape compute_strided_slice_shape(const ITensorInfo &input, * * @return the calculated shape */ -inline TensorShape compute_slice_shape(const TensorShape &input_shape, const Coordinates &starts, const Coordinates &ends) +inline TensorShape +compute_slice_shape(const TensorShape &input_shape, const Coordinates &starts, const Coordinates &ends) { using namespace arm_compute::helpers::tensor_transform; - return compute_strided_slice_output_shape(input_shape, - starts, ends, BiStrides(), - 0, construct_slice_end_mask(ends), 0); + return compute_strided_slice_output_shape(input_shape, starts, ends, BiStrides(), 0, construct_slice_end_mask(ends), + 0); } /** Calculate the batch to space output shape of a tensor * - * @param[in] input Input tensor info - * @param[in] block_x Block shape x value - * @param[in] block_y Block shape y value + * @param[in] data_layout Data layout + * @param[in] input Input tensor shape + * @param[in] block_x Block shape x value + * @param[in] block_y Block shape y value + * @param[in] crop_info Information about how the output shape is cropped after batch to space is performed * * @return the calculated shape */ -inline TensorShape compute_batch_to_space_shape(const ITensorInfo *input, const int block_x, const int block_y) +inline TensorShape compute_batch_to_space_shape( + DataLayout data_layout, const TensorShape &input, int block_x, int block_y, const CropInfo &crop_info = CropInfo{}) { - ARM_COMPUTE_ERROR_ON(block_x <= 0 || block_y <= 0); + ARM_COMPUTE_ERROR_ON(block_x < 1 || block_y < 1); - const DataLayout data_layout = input->data_layout(); - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); + + TensorShape output_shape{input}; + + unsigned int new_width = input[idx_width] * static_cast<unsigned int>(block_x); + unsigned int new_height = input[idx_height] * static_cast<unsigned int>(block_y); + const unsigned int width_crop = crop_info.left + crop_info.right; + const unsigned int height_crop = crop_info.top + crop_info.bottom; + ARM_COMPUTE_ERROR_ON(new_width <= width_crop); + ARM_COMPUTE_ERROR_ON(new_height <= height_crop); + new_width -= width_crop; + new_height -= height_crop; - TensorShape output_shape{ input->tensor_shape() }; - output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_x); - output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_y); - output_shape.set(idx_batch, input->tensor_shape()[idx_batch] / (block_x * block_y)); + output_shape.set(idx_width, new_width); + output_shape.set(idx_height, new_height); + output_shape.set(idx_batch, input[idx_batch] / (block_x * block_y)); return output_shape; } @@ -1070,7 +1248,7 @@ inline TensorShape compute_depth_to_space_shape(const TensorShape &input_shape, const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - TensorShape output_shape{ input_shape }; + TensorShape output_shape{input_shape}; output_shape.set(idx_width, input_shape[idx_width] * block); output_shape.set(idx_height, input_shape[idx_height] * block); output_shape.set(idx_channel, input_shape[idx_channel] / (block * block)); @@ -1091,10 +1269,10 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax TensorShape empty_shape; empty_shape.set(0, 0); - TensorShape out_shape{ input->tensor_shape() }; + TensorShape out_shape{input->tensor_shape()}; // Return empty shape if axis is invalid - if(axis > input->tensor_shape().num_dimensions()) + if (axis > input->tensor_shape().num_dimensions()) { return empty_shape; } @@ -1102,7 +1280,7 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax size_t axis_size = out_shape[axis]; // Return empty shape if num_split is not valid - if(axis_size % num_splits) + if (axis_size % num_splits) { return empty_shape; } @@ -1121,9 +1299,10 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax * * @return the calculated shape */ -inline TensorShape compute_space_to_batch_shape(const ITensorInfo *input, const int block_x, const int block_y, const Size2D &padding_left, const Size2D &padding_right) +inline TensorShape compute_space_to_batch_shape( + const ITensorInfo *input, int block_x, int block_y, const Size2D &padding_left, const Size2D &padding_right) { - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; const DataLayout data_layout = input->data_layout(); const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); @@ -1149,16 +1328,16 @@ inline TensorShape compute_space_to_batch_shape(const ITensorInfo *input, const */ inline TensorShape compute_space_to_depth_shape(const ITensorInfo *input, int32_t block_shape) { - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; const DataLayout data_layout = input->data_layout(); const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int idx_depth = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_shape); - output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_shape); - output_shape.set(idx_depth, input->tensor_shape()[idx_depth] / (block_shape * block_shape)); + output_shape.set(idx_width, input->tensor_shape()[idx_width] / block_shape); + output_shape.set(idx_height, input->tensor_shape()[idx_height] / block_shape); + output_shape.set(idx_depth, input->tensor_shape()[idx_depth] * (block_shape * block_shape)); return output_shape; } @@ -1194,7 +1373,7 @@ inline TensorShape compute_prior_box_shape(const ITensorInfo &input, const Prior inline TensorShape compute_padded_shape(const TensorShape &input_shape, const PaddingList &padding) { TensorShape padded_shape = input_shape; - for(size_t dim = 0; dim < padding.size(); ++dim) + for (size_t dim = 0; dim < padding.size(); ++dim) { const auto &padding_pair = padding[dim]; const uint32_t shape_on_index = (padded_shape.num_dimensions() <= dim) ? 1 : input_shape[dim]; @@ -1213,7 +1392,7 @@ inline TensorShape compute_padded_shape(const TensorShape &input_shape, const Pa inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Multiples &multiples) { TensorShape tiled_shape = input_shape; - for(size_t dim = 0; dim < multiples.size(); ++dim) + for (size_t dim = 0; dim < multiples.size(); ++dim) { tiled_shape.set(dim, input_shape[dim] * multiples[dim]); } @@ -1230,9 +1409,9 @@ inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Mul */ inline TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis, bool keep_dims = true) { - TensorShape output_shape{ input }; + TensorShape output_shape{input}; - if(!keep_dims) + if (!keep_dims) { output_shape.remove_dimension(axis); } @@ -1325,14 +1504,14 @@ inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, si #if defined(ARM_COMPUTE_ASSERTS_ENABLED) // All dimensions must match except the axis one - for(unsigned int i = 0; i < MAX_DIMS; ++i) + for (unsigned int i = 0; i < MAX_DIMS; ++i) { - if(i == axis) + if (i == axis) { continue; } - for(const auto &tensor : input) + for (const auto &tensor : input) { ARM_COMPUTE_ERROR_ON(tensor == nullptr); const TensorShape shape = extract_shape(tensor); @@ -1343,7 +1522,7 @@ inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, si // Calculate output shape size_t new_size = 0; - for(const auto &tensor : input) + for (const auto &tensor : input) { const TensorShape shape = extract_shape(tensor); new_size += shape[axis]; @@ -1366,14 +1545,14 @@ inline TensorShape compute_stack_shape(const ITensorInfo &a, unsigned int axis, ARM_COMPUTE_ERROR_ON(axis > a.num_dimensions()); ARM_COMPUTE_ERROR_ON(a.num_dimensions() > 4); - TensorShape shape_out{ a.tensor_shape() }; + TensorShape shape_out{a.tensor_shape()}; shape_out.set(axis, num_tensors); unsigned int i_shift = 0; - for(unsigned int i = 0; i < a.num_dimensions(); ++i) + for (unsigned int i = 0; i < a.num_dimensions(); ++i) { - if(i == axis) + if (i == axis) { i_shift++; } @@ -1383,18 +1562,177 @@ inline TensorShape compute_stack_shape(const ITensorInfo &a, unsigned int axis, return shape_out; } -inline TensorShape compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_shape, uint32_t actual_axis) +/** Calculate the output shape of 3d Convolution + * + * @param[in] src Input tensor shape + * @param[in] weights Weights tensor shape + * @param[in] conv3d_info 3d Convolution Parameters object + * + * @return the calculated shape + */ +inline TensorShape +compute_conv3d_shape(const TensorShape &src, const TensorShape &weights, const Conv3dInfo &conv3d_info) { - ARM_COMPUTE_ERROR_ON(indices_shape.num_dimensions() > 1); - ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() > 4); - ARM_COMPUTE_ERROR_ON(actual_axis >= input_shape.num_dimensions()); + // Weight tensor shape indices (D H W Cin Cout) + constexpr unsigned int weights_depth_dim = 4u; + constexpr unsigned int weights_height_dim = 3u; + constexpr unsigned int weights_width_dim = 2u; + constexpr unsigned int weights_CHout_dim = 0u; + + // Source/Destination Tensor shape indices (N D H W C) + constexpr unsigned int batch_dim = 4u; + constexpr unsigned int depth_dim = 3u; + constexpr unsigned int height_dim = 2u; + constexpr unsigned int width_dim = 1u; + constexpr unsigned int channel_dim = 0u; + + TensorShape output_shape{src}; + const size_t pad_left = conv3d_info.padding.left; + const size_t pad_right = conv3d_info.padding.right; + const size_t pad_top = conv3d_info.padding.top; + const size_t pad_bottom = conv3d_info.padding.bottom; + const size_t pad_front = conv3d_info.padding.front; + const size_t pad_back = conv3d_info.padding.back; + const size_t dilation_x = conv3d_info.dilation.width; + const size_t dilation_y = conv3d_info.dilation.height; + const size_t dilation_z = conv3d_info.dilation.depth; + const size_t stride_x = conv3d_info.stride.x(); + const size_t stride_y = conv3d_info.stride.y(); + const size_t stride_z = conv3d_info.stride.z(); + + int output_width_size = 0; + int output_height_size = 0; + int output_depth_size = 0; + + switch (conv3d_info.round_type) + { + case DimensionRoundingType::FLOOR: + output_width_size = + static_cast<int>(std::floor((static_cast<float>(src[width_dim] + pad_left + pad_right - + (dilation_x * (weights[weights_width_dim] - 1) + 1)) / + stride_x) + + 1)); + output_height_size = + static_cast<int>(std::floor((static_cast<float>(src[height_dim] + pad_top + pad_bottom - + (dilation_y * (weights[weights_height_dim] - 1) + 1)) / + stride_y) + + 1)); + output_depth_size = + static_cast<int>(std::floor((static_cast<float>(src[depth_dim] + pad_front + pad_back - + (dilation_z * (weights[weights_depth_dim] - 1) + 1)) / + stride_z) + + 1)); + break; + case DimensionRoundingType::CEIL: + output_width_size = + static_cast<int>(std::ceil((static_cast<float>(src[width_dim] + pad_left + pad_right - + (dilation_x * (weights[weights_width_dim] - 1) + 1)) / + stride_x) + + 1)); + output_height_size = + static_cast<int>(std::ceil((static_cast<float>(src[height_dim] + pad_top + pad_bottom - + (dilation_y * (weights[weights_height_dim] - 1) + 1)) / + stride_y) + + 1)); + output_depth_size = + static_cast<int>(std::ceil((static_cast<float>(src[depth_dim] + pad_front + pad_back - + (dilation_z * (weights[weights_depth_dim] - 1) + 1)) / + stride_z) + + 1)); + break; + default: + ARM_COMPUTE_ERROR("Unsupported rounding type"); + } + + output_shape.set(batch_dim, src[batch_dim]); + output_shape.set(width_dim, output_width_size); + output_shape.set(height_dim, output_height_size); + output_shape.set(depth_dim, output_depth_size); + output_shape.set(channel_dim, weights[weights_CHout_dim]); + return output_shape; +} + +/** Calculate the output pool3d shape of a tensor + * + * @param[in] src Input tensor info + * @param[in] pool3d_info Pooling layer info + * + * @return the calculated shape + */ +inline TensorShape compute_pool3d_shape(const TensorShape &src, Pooling3dLayerInfo pool3d_info) +{ + TensorShape output_shape{src}; + + const auto data_layout = DataLayout::NDHWC; + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int idx_depth = get_data_layout_dimension_index(data_layout, DataLayoutDimension::DEPTH); + const int pool_size_width = pool3d_info.is_global_pooling ? src[idx_width] : pool3d_info.pool_size.width; + const int pool_size_height = pool3d_info.is_global_pooling ? src[idx_height] : pool3d_info.pool_size.height; + const int pool_size_depth = pool3d_info.is_global_pooling ? src[idx_depth] : pool3d_info.pool_size.depth; + int output_width = 0; + int output_height = 0; + int output_depth = 0; + + std::tie(output_width, output_height, output_depth) = + scaled_3d_dimensions_signed(src[idx_width], src[idx_height], src[idx_depth], pool_size_width, pool_size_height, + pool_size_depth, pool3d_info); + + ARM_COMPUTE_ERROR_ON_MSG((output_width < 1 || output_height < 1 || output_depth < 1), + "Calculated output dimension size is invalid"); + + output_shape.set(idx_width, static_cast<size_t>(output_width)); + output_shape.set(idx_height, static_cast<size_t>(output_height)); + output_shape.set(idx_depth, static_cast<size_t>(output_depth)); + + return output_shape; +} + +/** Calculate the gather output shape of a tensor + * + * @param[in] input_shape Input tensor shape + * @param[in] indices_shape Indices tensor shape. Only supports for 2d and 3d indices + * @param[in] actual_axis Axis to be used in the computation + * + * @note Let input_shape be (X,Y,Z) and indices shape (W,O,P) and axis 1 + * the new shape is computed by replacing the axis in the input shape with + * the indice shape so the output shape will be (X,W,O,P,Z) + * + * @return the calculated shape + */ +inline TensorShape +compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_shape, uint32_t actual_axis) +{ + const auto input_num_dims = input_shape.num_dimensions(); + const auto indices_num_dims = indices_shape.num_dimensions(); + + ARM_COMPUTE_ERROR_ON(actual_axis >= input_num_dims); + ARM_COMPUTE_ERROR_ON(input_num_dims + indices_num_dims - 1 > Coordinates::num_max_dimensions); + + TensorShape output_shape; + size_t dim_no = 0; + + for (; dim_no < actual_axis; ++dim_no) + { + output_shape.set(dim_no, input_shape[dim_no]); + } + + for (; dim_no < actual_axis + indices_num_dims; ++dim_no) + { + output_shape.set(dim_no, indices_shape[dim_no - actual_axis]); + } + + for (; dim_no < input_num_dims + indices_num_dims - 1; ++dim_no) + { + output_shape.set(dim_no, input_shape[dim_no + 1 - indices_num_dims]); + } - TensorShape output_shape = input_shape; - output_shape[actual_axis] = indices_shape[0]; + ARM_COMPUTE_ERROR_ON(input_shape.total_size() * indices_shape.total_size() != + output_shape.total_size() * input_shape[actual_axis]); return output_shape; } } // namespace shape_calculator } // namespace misc } // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H */ +#endif // ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H diff --git a/arm_compute/core/utils/misc/Traits.h b/arm_compute/core/utils/misc/Traits.h index 933922f63c..944fcb95f9 100644 --- a/arm_compute/core/utils/misc/Traits.h +++ b/arm_compute/core/utils/misc/Traits.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_UTILS_TRAITS_TRAITS_H #include "arm_compute/core/Types.h" + #include <type_traits> namespace arm_compute diff --git a/arm_compute/core/utils/misc/Utility.h b/arm_compute/core/utils/misc/Utility.h index 648758ca07..22f10d74cc 100644 --- a/arm_compute/core/utils/misc/Utility.h +++ b/arm_compute/core/utils/misc/Utility.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,7 @@ #include <algorithm> #include <array> +#include <cstdint> #include <limits> #include <numeric> #include <vector> @@ -43,7 +44,7 @@ struct index_sequence }; template <std::size_t N, std::size_t... S> -struct index_sequence_generator : index_sequence_generator < N - 1, N - 1, S... > +struct index_sequence_generator : index_sequence_generator<N - 1, N - 1, S...> { }; @@ -57,17 +58,17 @@ template <std::size_t N> using index_sequence_t = typename index_sequence_generator<N>::type; template <typename T, std::size_t N, T val, T... vals> -struct generate_array : generate_array < T, N - 1, val, val, vals... > +struct generate_array : generate_array<T, N - 1, val, val, vals...> { }; template <typename T, T val, T... vals> struct generate_array<T, 0, val, vals...> { - static constexpr std::array<T, sizeof...(vals)> value{ vals... }; + static constexpr std::array<T, sizeof...(vals)> value{vals...}; }; -template <typename T, T val, T... vals> +template <typename T, T val, T... vals> constexpr std::array<T, sizeof...(vals)> generate_array<T, 0, val, vals...>::value; /** @endcond */ @@ -78,7 +79,7 @@ template <std::size_t... S, typename T = std::array<typename std::iterator_traits<Iterator>::value_type, sizeof...(S)>> T make_array(Iterator first, index_sequence<S...>) { - return T{ { first[S]... } }; + return T{{first[S]...}}; } } // namespace detail @@ -86,7 +87,7 @@ template <std::size_t N, typename Iterator> std::array<typename std::iterator_traits<Iterator>::value_type, N> make_array(Iterator first, Iterator last) { ARM_COMPUTE_UNUSED(last); - return detail::make_array(first, index_sequence_t<N> {}); + return detail::make_array(first, index_sequence_t<N>{}); } /** Performs clamping among a lower and upper value. @@ -118,7 +119,7 @@ inline void for_each(F &&) * @param[in] args Remaining arguments */ template <typename F, typename T, typename... Ts> -inline void for_each(F &&func, T &&arg, Ts &&... args) +inline void for_each(F &&func, T &&arg, Ts &&...args) { func(std::forward<T>(arg)); for_each(std::forward<F>(func), std::forward<Ts>(args)...); @@ -142,9 +143,11 @@ inline T &&foldl(F &&, T &&value) * @param[in] values Remaining arguments */ template <typename F, typename T, typename U, typename... Us> -inline auto foldl(F &&func, T &&initial, U &&value, Us &&... values) -> decltype(func(std::forward<T>(initial), std::forward<U>(value))) +inline auto foldl(F &&func, T &&initial, U &&value, Us &&...values) + -> decltype(func(std::forward<T>(initial), std::forward<U>(value))) { - return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)), std::forward<Us>(values)...); + return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)), + std::forward<Us>(values)...); } /** Perform an index sort of a given vector. @@ -159,11 +162,7 @@ std::vector<size_t> sort_indices(const std::vector<T> &v) std::vector<size_t> idx(v.size()); std::iota(idx.begin(), idx.end(), 0); - std::sort(idx.begin(), idx.end(), - [&v](size_t i1, size_t i2) - { - return v[i1] < v[i2]; - }); + std::sort(idx.begin(), idx.end(), [&v](size_t i1, size_t i2) { return v[i1] < v[i2]; }); return idx; } @@ -177,7 +176,7 @@ std::vector<size_t> sort_indices(const std::vector<T> &v) */ inline bool endswith(const std::string &str, const std::string &suffix) { - if(str.size() < suffix.size()) + if (str.size() < suffix.size()) { return false; } @@ -204,10 +203,7 @@ inline bool check_aligned(void *ptr, const size_t alignment) */ inline std::string tolower(std::string string) { - std::transform(string.begin(), string.end(), string.begin(), [](unsigned char c) - { - return std::tolower(c); - }); + std::transform(string.begin(), string.end(), string.begin(), [](unsigned char c) { return std::tolower(c); }); return string; } @@ -226,7 +222,7 @@ inline std::string getenv(const std::string &env_name) return std::string{}; #else // BARE_METAL const auto env_chr = std::getenv(env_name.c_str()); - return env_chr == nullptr ? std::string{} : std::string{ env_chr }; + return env_chr == nullptr ? std::string{} : std::string{env_chr}; #endif // BARE_METAL } } // namespace utility diff --git a/arm_compute/core/utils/quantization/AsymmHelpers.h b/arm_compute/core/utils/quantization/AsymmHelpers.h index cbf7559bc9..2324fe1838 100644 --- a/arm_compute/core/utils/quantization/AsymmHelpers.h +++ b/arm_compute/core/utils/quantization/AsymmHelpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,7 +41,10 @@ namespace quantization * * @return a status */ -Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon = false); +Status calculate_quantized_multiplier(float multiplier, + int32_t *quant_multiplier, + int32_t *shift, + bool ignore_epsilon = false); /** Calculate quantized representation of multiplier with value less than one. * * @param[in] multiplier Real multiplier. @@ -51,7 +54,10 @@ Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplie * * @return a status */ -Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *quant_multiplier, int32_t *right_shift, bool ignore_epsilon = false); +Status calculate_quantized_multiplier_less_than_one(float multiplier, + int32_t *quant_multiplier, + int32_t *right_shift, + bool ignore_epsilon = false); /** Calculate quantized representation of multiplier having value greater than one. * * @param[in] multiplier Real multiplier. @@ -60,7 +66,8 @@ Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *q * * @return a status */ -Status calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t *quantized_multiplier, int32_t *left_shift); +Status +calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t *quantized_multiplier, int32_t *left_shift); /** Calculate quantized representation of per-channel multipliers * @@ -71,9 +78,9 @@ Status calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t * * @return a status */ -Status calculate_quantized_multipliers(const QuantizationInfo &iq_info, - const QuantizationInfo &wq_info, - const QuantizationInfo &oq_info, +Status calculate_quantized_multipliers(const QuantizationInfo &iq_info, + const QuantizationInfo &wq_info, + const QuantizationInfo &oq_info, GEMMLowpOutputStageInfo &stage_info); /** Get minimum and maximum values for the input quantized data type @@ -81,6 +88,7 @@ Status calculate_quantized_multipliers(const QuantizationInfo &iq_info, * @return min and max values for the quantized data type */ std::pair<int, int> get_min_max_values_from_quantized_data_type(DataType data_type); + /** Compute quantized per-channel multipliers and shifts. As many multipliers * and shifts as output channels are computed. If weights are not quantized * per-channel, multipliers and shifts will end up being the same for each @@ -89,16 +97,12 @@ std::pair<int, int> get_min_max_values_from_quantized_data_type(DataType data_ty * @param[in] input Input tensor info. * @param[in] weights Weights tensor info. * @param[in] output Output tensor info. - * @param[in] idx_ofms Dimension index to get OFMs from the weights tensor. * @param[out] output_multipliers_ptr Pointer to the buffer where to store per-channel multipliers. * @param[out] output_shifts_ptr Pointer to the buffer where to store per-channel shifts. - * - * @return min and max values for the quantized data type */ void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, - unsigned int idx_ofms, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr); @@ -150,7 +154,10 @@ int32_t saturating_rounding_multiply_by_pow2(int32_t exponent, int32_t v); * @param[out] output_shift Shift for inverse square root * */ -void get_invsqrt_quantized_multiplier_exp(int32_t input, int32_t reverse_shift, int32_t &output_inv_sqrt, int32_t &output_shift); +void get_invsqrt_quantized_multiplier_exp(int32_t input, + int32_t reverse_shift, + int32_t &output_inv_sqrt, + int32_t &output_shift); } // namespace quantization } // namespace arm_compute |