diff options
Diffstat (limited to 'arm_compute/core/CPP')
-rw-r--r-- | arm_compute/core/CPP/CPPKernels.h | 5 | ||||
-rw-r--r-- | arm_compute/core/CPP/CPPTypes.h | 256 | ||||
-rw-r--r-- | arm_compute/core/CPP/ICPPKernel.h | 41 | ||||
-rw-r--r-- | arm_compute/core/CPP/ICPPSimpleKernel.h | 76 | ||||
-rw-r--r-- | arm_compute/core/CPP/Validate.h | 117 | ||||
-rw-r--r-- | arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h | 26 | ||||
-rw-r--r-- | arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h | 80 | ||||
-rw-r--r-- | arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h | 75 | ||||
-rw-r--r-- | arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h | 20 | ||||
-rw-r--r-- | arm_compute/core/CPP/kernels/CPPPermuteKernel.h | 10 | ||||
-rw-r--r-- | arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h | 73 | ||||
-rw-r--r-- | arm_compute/core/CPP/kernels/CPPTopKVKernel.h | 9 | ||||
-rw-r--r-- | arm_compute/core/CPP/kernels/CPPUpsampleKernel.h | 6 |
13 files changed, 193 insertions, 601 deletions
diff --git a/arm_compute/core/CPP/CPPKernels.h b/arm_compute/core/CPP/CPPKernels.h index c7b40baf22..f6f36596c4 100644 --- a/arm_compute/core/CPP/CPPKernels.h +++ b/arm_compute/core/CPP/CPPKernels.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,11 +26,8 @@ /* Header regrouping all the CPP kernels */ #include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h" -#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" -#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" #include "arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h" #include "arm_compute/core/CPP/kernels/CPPPermuteKernel.h" -#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" #include "arm_compute/core/CPP/kernels/CPPTopKVKernel.h" #include "arm_compute/core/CPP/kernels/CPPUpsampleKernel.h" diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h index d3f6fc944d..c97751bc0c 100644 --- a/arm_compute/core/CPP/CPPTypes.h +++ b/arm_compute/core/CPP/CPPTypes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,111 +21,123 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPP_TYPES_H -#define ARM_COMPUTE_CPP_TYPES_H +#ifndef ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H +#define ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H #include "arm_compute/core/Error.h" -#include <array> -#include <string> -#include <vector> +#include <memory> namespace arm_compute { -/** CPU models - we only need to detect CPUs we have - * microarchitecture-specific code for. - * - * Architecture features are detected via HWCAPs. - */ -enum class CPUModel +namespace cpuinfo { - GENERIC, - GENERIC_FP16, - GENERIC_FP16_DOT, - A53, - A55r0, - A55r1 -}; - -/** Global memory policy. - * The functions in the runtime will use different strategies based on the policy currently set. - * - * MINIMIZE will try to reduce the amount allocated by the functions at the expense of performance normally. - * NORMAL won't try to save any memory and will favor speed over memory consumption +struct CpuIsaInfo; +} // namespace cpuinfo + +#define ARM_COMPUTE_CPU_MODEL_LIST \ + X(GENERIC) \ + X(GENERIC_FP16) \ + X(GENERIC_FP16_DOT) \ + X(A53) \ + X(A55r0) \ + X(A55r1) \ + X(A35) \ + X(A73) \ + X(A76) \ + X(A510) \ + X(X1) \ + X(V1) \ + X(A64FX) \ + X(N1) + +/** CPU models types * + * @note We only need to detect CPUs we have microarchitecture-specific code for. + * @note Architecture features are detected via HWCAPs. */ -enum class MemoryPolicy +enum class CPUModel { - MINIMIZE, - NORMAL +#define X(model) model, + ARM_COMPUTE_CPU_MODEL_LIST +#undef X }; -/** Convert a cpumodel value to a string - * - * @param val CPUModel value to be converted - * - * @return String representing the corresponding CPUModel. - */ -inline std::string cpu_model_to_string(CPUModel val) -{ - switch(val) - { - case CPUModel::GENERIC: - { - return std::string("GENERIC"); - } - case CPUModel::GENERIC_FP16: - { - return std::string("GENERIC_FP16"); - } - case CPUModel::GENERIC_FP16_DOT: - { - return std::string("GENERIC_FP16_DOT"); - } - case CPUModel::A53: - { - return std::string("A53"); - } - case CPUModel::A55r0: - { - return std::string("A55r0"); - } - case CPUModel::A55r1: - { - return std::string("A55r1"); - } - default: - { - ARM_COMPUTE_ERROR("Invalid CPUModel."); - return std::string("GENERIC"); - } - } -} - class CPUInfo final { -public: - /** Constructor */ +protected: CPUInfo(); + ~CPUInfo(); - /** Disable copy constructor and assignment operator to avoid copying the vector of CPUs each time - * CPUInfo is initialized once in the IScheduler and ThreadInfo will get a pointer to it. +public: + /** Access the KernelLibrary singleton. + * This method has been deprecated and will be removed in future releases + * @return The KernelLibrary instance. */ - CPUInfo &operator=(const CPUInfo &cpuinfo) = delete; - CPUInfo(const CPUInfo &cpuinfo) = delete; - CPUInfo &operator=(CPUInfo &&cpuinfo) = default; - CPUInfo(CPUInfo &&cpuinfo) = default; + static CPUInfo &get(); + + /* Delete move and copy constructors and assignment operator + s */ + CPUInfo(CPUInfo const &) = delete; // Copy construct + CPUInfo(CPUInfo &&) = delete; // Move construct + CPUInfo &operator=(CPUInfo const &) = delete; // Copy assign + CPUInfo &operator=(CPUInfo &&) = delete; // Move assign /** Checks if the cpu model supports fp16. * - * @return true of the cpu supports fp16, false otherwise + * @return true if the cpu supports fp16, false otherwise */ bool has_fp16() const; + /** Checks if the cpu model supports bf16. + * + * @return true if the cpu supports bf16, false otherwise + */ + bool has_bf16() const; + /** Checks if the cpu model supports bf16. + * + * @return true if the cpu supports bf16, false otherwise + */ + bool has_svebf16() const; /** Checks if the cpu model supports dot product. * - * @return true of the cpu supports dot product, false otherwise + * @return true if the cpu supports dot product, false otherwise */ bool has_dotprod() const; + /** Checks if the cpu model supports floating-point matrix multiplication. + * + * @return true if the cpu supports floating-point matrix multiplication, false otherwise + */ + bool has_svef32mm() const; + /** Checks if the cpu model supports integer matrix multiplication. + * + * @return true if the cpu supports integer matrix multiplication, false otherwise + */ + bool has_i8mm() const; + /** Checks if the cpu model supports integer matrix multiplication. + * + * @return true if the cpu supports integer matrix multiplication, false otherwise + */ + bool has_svei8mm() const; + /** Checks if the cpu model supports sve. + * + * @return true if the cpu supports sve, false otherwise + */ + bool has_sve() const; + /** Checks if the cpu model supports sve2. + * + * @return true if the cpu supports sve2, false otherwise + */ + bool has_sve2() const; + /** Checks if the cpu model supports sme. + * + * @return true if the cpu supports sme, false otherwise + */ + bool has_sme() const; + /** Checks if the cpu model supports sme2. + * + * @return true if the cpu supports sme2, false otherwise + */ + bool has_sme2() const; /** Gets the cpu model for a given cpuid. * * @param[in] cpuid the id of the cpu core to be retrieved, @@ -138,6 +150,11 @@ public: * @return Current thread's @ref CPUModel */ CPUModel get_cpu_model() const; + /** Gets the current cpu's ISA information + * + * @return Current cpu's ISA information + */ + cpuinfo::CpuIsaInfo get_isa() const; /** Gets the L1 cache size * * @return the size of the L1 cache @@ -148,85 +165,34 @@ public: * @return the size of the L1 cache */ unsigned int get_L2_cache_size() const; - /** Set the L1 cache size - * - * @param[in] size the new size to be set. - */ - void set_L1_cache_size(unsigned int size); - /** Set the L2 cache size - * - * @param[in] size the new size to be set. - */ - void set_L2_cache_size(unsigned int size); - /** Set fp16 support - * - * @param[in] fp16 whether the cpu supports fp16. - */ - void set_fp16(const bool fp16); - /** Set dot product support - * - * @param[in] dotprod whether the cpu supports dot product. - */ - void set_dotprod(const bool dotprod); - /** Set the cpumodel for a given cpu core - * - * @param[in] cpuid the id of the core to be set. - * @param[in] model the @ref CPUModel to be set. - */ - void set_cpu_model(unsigned int cpuid, CPUModel model); - /** Set max number of cpus - * - * @param[in] cpu_count the number of CPUs in the system. - */ - void set_cpu_num(unsigned int cpu_count); - /** Return the maximum number of CPUs present * * @return Number of CPUs */ unsigned int get_cpu_num() const; - -private: - std::vector<CPUModel> _percpu = {}; - bool _fp16 = false; - bool _dotprod = false; - unsigned int _L1_cache_size = 32768; - unsigned int _L2_cache_size = 262144; -}; - -class MEMInfo final -{ -public: - MEMInfo(); - - /** Return the total amount of RAM memory in the system expressed in KB. + /** Return the maximum number of CPUs present excluding the little cores + * in case of an Android device * - * @return Total memory + * @return Number of CPUs excluding little */ - size_t get_total_in_kb() const; - - static void set_policy(MemoryPolicy policy); - static MemoryPolicy get_policy(); - - /** Common memory sizes expressed in Kb to avoid having them - * duplicated throughout the code. + unsigned int get_cpu_num_excluding_little() const; + /** Return the vector length in bytes for sme2 + * + * @return Vector length if sme2 is enabled, otherwise returns 0. */ - static const size_t ONE_GB_IN_KB = { 1035842 }; - static const size_t TWO_GB_IN_KB = { ONE_GB_IN_KB * 2 }; + unsigned long get_sme2_vector_length() const; private: - size_t _total; - size_t _free; - size_t _buffer; - static MemoryPolicy _policy; + struct Impl; + std::unique_ptr<Impl> _impl; }; /** Information about executing thread and CPU. */ struct ThreadInfo { - int thread_id{ 0 }; - int num_threads{ 1 }; - const CPUInfo *cpu_info{ nullptr }; + int thread_id{0}; + int num_threads{1}; + const CPUInfo *cpu_info{nullptr}; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_CPP_TYPES_H */ +#endif // ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h index ec05af20bd..03967a536d 100644 --- a/arm_compute/core/CPP/ICPPKernel.h +++ b/arm_compute/core/CPP/ICPPKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,16 +25,21 @@ #define ARM_COMPUTE_ICPPKERNEL_H #include "arm_compute/core/CPP/CPPTypes.h" +#include "arm_compute/core/experimental/Types.h" #include "arm_compute/core/IKernel.h" +#include "arm_compute/core/Types.h" namespace arm_compute { class Window; +class ITensor; /** Common interface for all kernels implemented in C++ */ class ICPPKernel : public IKernel { public: + static constexpr size_t default_mws = 1; /* Default minimum workload size value - no impact */ + /** Default destructor */ virtual ~ICPPKernel() = default; @@ -51,8 +56,7 @@ public: */ virtual void run(const Window &window, const ThreadInfo &info) { - ARM_COMPUTE_UNUSED(window); - ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_UNUSED(window, info); ARM_COMPUTE_ERROR("default implementation of legacy run() virtual member function invoked"); } @@ -69,6 +73,37 @@ public: run(window, info); } + /** Execute the kernel on the passed window + * + * @warning If is_parallelisable() returns false then the passed window must be equal to window() + * + * @note The window has to be a region within the window returned by the window() method + * + * @note The width of the window has to be a multiple of num_elems_processed_per_iteration(). + * + * @param[in] tensors A vector containing the tensors to operate on. + * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) + * @param[in] info Info about executing thread and CPU. + */ + virtual void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) + { + ARM_COMPUTE_UNUSED(tensors, window, info); + } + + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return Minimum workload size for requested configuration. + */ + virtual size_t get_mws(const CPUInfo &platform, size_t thread_count) const + { + ARM_COMPUTE_UNUSED(platform, thread_count); + + return default_mws; + } + /** Name of the kernel * * @return Kernel name diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h deleted file mode 100644 index acdd054c0e..0000000000 --- a/arm_compute/core/CPP/ICPPSimpleKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICPPSIMPLEKERNEL_H -#define ARM_COMPUTE_ICPPSIMPLEKERNEL_H - -#include "arm_compute/core/CPP/ICPPKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for simple C++ kernels having 1 tensor input and 1 tensor output */ -class ICPPSimpleKernel : public ICPPKernel -{ -public: - /** Constructor */ - ICPPSimpleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICPPSimpleKernel(const ICPPSimpleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICPPSimpleKernel &operator=(const ICPPSimpleKernel &) = delete; - /** Allow instances of this class to be moved */ - ICPPSimpleKernel(ICPPSimpleKernel &&) = default; - /** Allow instances of this class to be moved */ - ICPPSimpleKernel &operator=(ICPPSimpleKernel &&) = default; - /** Default destructor */ - ~ICPPSimpleKernel() = default; - -protected: - /** Configure the kernel - * - * @param[in] input Source tensor. - * @param[out] output Destination tensor. - * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. - * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. - * @param[in] border_size (Optional) Size of the border. - */ - void configure(const ITensor *input, ITensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); - /** Static function to check if given info will lead to a valid configuration of @ref ICPPSimpleKernel. - * - * @param[in] input Source tensor info. - * @param[in] output Destination tensor info. - * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. - * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. - * @param[in] border_size (Optional) Size of the border. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration, - bool border_undefined = false, const BorderSize &border_size = BorderSize()); - -protected: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_ICPPSIMPLEKERNEL_H */ diff --git a/arm_compute/core/CPP/Validate.h b/arm_compute/core/CPP/Validate.h deleted file mode 100644 index dfee9de86e..0000000000 --- a/arm_compute/core/CPP/Validate.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CPP_VALIDATE_H -#define ARM_COMPUTE_CPP_VALIDATE_H - -#include "arm_compute/core/Validate.h" - -namespace arm_compute -{ -/** Return an error if the data type of the passed tensor info is FP16 and FP16 support is not compiled in. - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * @param[in] tensor_info Tensor info to validate. - * - * @return Status - */ -inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); -#ifndef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::F16, - function, file, line, "This CPU architecture does not support F16 data type, you need v8.2 or above"); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - return Status {}; -} - -/** Return an error if the data type of the passed tensor info is BFLOAT16 and BFLOAT16 support is not compiled in. - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * @param[in] tensor_info Tensor info to validate. - * - * @return Status - */ -inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); -#if !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)) - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::BFLOAT16, - function, file, line, "This CPU architecture does not support BFloat16 data type, you need v8.6 or above"); -#endif /* !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)) */ - return Status {}; -} - -/** Return an error if the data type of the passed tensor is FP16 and FP16 support is not compiled in. - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * @param[in] tensor Tensor to validate. - * - * @return Status - */ -inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, - const ITensor *tensor) -{ - ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(function, file, line, tensor->info())); - return Status{}; -} - -/** Return an error if the data type of the passed tensor is BFLOAT16 and BFLOAT16 support is not compiled in. - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * @param[in] tensor Tensor to validate. - * - * @return Status - */ -inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line, - const ITensor *tensor) -{ - ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(function, file, line, tensor->info())); - return Status{}; -} - -#define ARM_COMPUTE_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor)) - -#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor)) - -#define ARM_COMPUTE_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_bf16(__func__, __FILE__, __LINE__, tensor)) - -#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(__func__, __FILE__, __LINE__, tensor)) -} // namespace arm_compute -#endif /* ARM_COMPUTE_CPP_VALIDATE_H */ diff --git a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h index 3fa83a6d6d..dd91595ea6 100644 --- a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h +++ b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,7 @@ #ifndef ARM_COMPUTE_CPPBOXWITHNONMAXIMASUPPRESSIONLIMITKERNEL_H #define ARM_COMPUTE_CPPBOXWITHNONMAXIMASUPPRESSIONLIMITKERNEL_H -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/IHOG.h" -#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/CPP/ICPPKernel.h" #include "arm_compute/core/Types.h" namespace arm_compute @@ -62,11 +60,19 @@ public: * @param[out] classes The classes output tensor of size [N]. Data types supported: Same as @p scores_in * @param[out] batch_splits_out (Optional) The batch splits output tensor [batch_size]. Data types supported: Same as @p scores_in * @param[out] keeps (Optional) The keeps output tensor of size [N]. Data types supported: Same as@p scores_in - * @param[out] keeps_size (Optional) Number of filtered indices per class tensor of size [num_classes]. Data types supported: Same as @p scores_in + * @param[out] keeps_size (Optional) Number of filtered indices per class tensor of size [num_classes]. Data types supported: U32 * @param[in] info (Optional) BoxNMSLimitInfo information. */ - void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes, - ITensor *batch_splits_out = nullptr, ITensor *keeps = nullptr, ITensor *keeps_size = nullptr, const BoxNMSLimitInfo info = BoxNMSLimitInfo()); + void configure(const ITensor *scores_in, + const ITensor *boxes_in, + const ITensor *batch_splits_in, + ITensor *scores_out, + ITensor *boxes_out, + ITensor *classes, + ITensor *batch_splits_out = nullptr, + ITensor *keeps = nullptr, + ITensor *keeps_size = nullptr, + const BoxNMSLimitInfo info = BoxNMSLimitInfo()); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; @@ -76,9 +82,9 @@ public: void run_nmslimit(); private: - const ITensor *_scores_in; - const ITensor *_boxes_in; - const ITensor *_batch_splits_in; + const ITensor *_scores_in; + const ITensor *_boxes_in; + const ITensor *_batch_splits_in; ITensor *_scores_out; ITensor *_boxes_out; ITensor *_classes; diff --git a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h deleted file mode 100644 index eeb6a65525..0000000000 --- a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H -#define ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/INEKernel.h" - -#include "support/Mutex.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; -/** Interface for CPP Images. */ -using IImage = ITensor; - -/** CPP kernel to perform corner candidates - */ -class CPPCornerCandidatesKernel : public INEKernel -{ -public: - const char *name() const override - { - return "CPPCornerCandidatesKernel"; - } - /** Default constructor */ - CPPCornerCandidatesKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPCornerCandidatesKernel(const CPPCornerCandidatesKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPCornerCandidatesKernel &operator=(const CPPCornerCandidatesKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - CPPCornerCandidatesKernel(CPPCornerCandidatesKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - CPPCornerCandidatesKernel &operator=(CPPCornerCandidatesKernel &&) = delete; - /** Default destructor */ - ~CPPCornerCandidatesKernel() = default; - - /** Setup the kernel parameters - * - * @param[in] input Source image (harris score). Format supported F32 - * @param[out] output Destination array of InternalKeypoint - * @param[out] num_corner_candidates Number of corner candidates - */ - void configure(const IImage *input, InternalKeypoint *output, int32_t *num_corner_candidates); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - int32_t *_num_corner_candidates; /**< Number of corner candidates */ - arm_compute::Mutex _corner_candidates_mutex; /**< Mutex to preventing race conditions */ - const IImage *_input; /**< Source image - Harris score */ - InternalKeypoint *_output; /**< Array of NEInternalKeypoint */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H */ diff --git a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h deleted file mode 100644 index cf8e4f00b9..0000000000 --- a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H -#define ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/IHOG.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -/** CPP kernel to perform in-place computation of euclidean distance on IDetectionWindowArray - * - * @note This kernel is meant to be used alongside HOG or other object detection algorithms to perform a non-maxima suppression on a - * IDetectionWindowArray - */ -class CPPDetectionWindowNonMaximaSuppressionKernel : public ICPPKernel -{ -public: - const char *name() const override - { - return "CPPDetectionWindowNonMaximaSuppressionKernel"; - } - /** Default constructor */ - CPPDetectionWindowNonMaximaSuppressionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPDetectionWindowNonMaximaSuppressionKernel(const CPPDetectionWindowNonMaximaSuppressionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPDetectionWindowNonMaximaSuppressionKernel &operator=(const CPPDetectionWindowNonMaximaSuppressionKernel &) = delete; - /** Allow instances of this class to be moved */ - CPPDetectionWindowNonMaximaSuppressionKernel(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default; - /** Allow instances of this class to be moved */ - CPPDetectionWindowNonMaximaSuppressionKernel &operator=(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default; - /** Initialise the kernel's input, output and the euclidean minimum distance - * - * @attention: If @ref IDetectionWindowArray is passed to the kernel, the map() and unmap() methods @ref IDetectionWindowArray must be called respectively before and after - * the run() method of @ref CPPDetectionWindowNonMaximaSuppressionKernel - * - * @param[in, out] input_output Input/Output array of @ref DetectionWindow - * @param[in] min_distance Radial Euclidean distance for non-maxima suppression - */ - void configure(IDetectionWindowArray *input_output, float min_distance); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - IDetectionWindowArray *_input_output; - float _min_distance; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H */ diff --git a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h index cb416af070..d1f7f8670f 100644 --- a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h +++ b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H #define ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H -#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h" namespace arm_compute { @@ -65,7 +64,12 @@ public: * @param[in] iou_threshold The threshold used in non maximum suppression. * */ - void configure(const ITensor *input_bboxes, const ITensor *input_scores, ITensor *output_indices, unsigned int max_output_size, const float score_threshold, const float iou_threshold); + void configure(const ITensor *input_bboxes, + const ITensor *input_scores, + ITensor *output_indices, + unsigned int max_output_size, + const float score_threshold, + const float iou_threshold); /** Static function to check if given arguments will lead to a valid configuration of @ref CPPNonMaximumSuppressionKernel * @@ -77,8 +81,12 @@ public: * @param[in] iou_threshold The threshold used in non maximum suppression. * */ - static Status validate(const ITensorInfo *input_bboxes, const ITensorInfo *input_scores, const ITensorInfo *output_indices, unsigned int max_output_size, - const float score_threshold, const float iou_threshold); + static Status validate(const ITensorInfo *input_bboxes, + const ITensorInfo *input_scores, + const ITensorInfo *output_indices, + unsigned int max_output_size, + const float score_threshold, + const float iou_threshold); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; diff --git a/arm_compute/core/CPP/kernels/CPPPermuteKernel.h b/arm_compute/core/CPP/kernels/CPPPermuteKernel.h index e75152f4ea..d141c2fb70 100644 --- a/arm_compute/core/CPP/kernels/CPPPermuteKernel.h +++ b/arm_compute/core/CPP/kernels/CPPPermuteKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -56,15 +56,15 @@ public: /** Set the input and output of the kernel. * - * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32 - * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] input The input tensor to permute. Data types supported: All. + * @param[out] output The output tensor. Data types supported: same as @p input * @param[in] perm Permutation vector */ void configure(const ITensor *input, ITensor *output, const PermutationVector &perm); /** Static function to check if given info will lead to a valid configuration of @ref CPPPermuteKernel * - * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32 - * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] input The input tensor to permute. Data types supported: All. + * @param[in] output The output tensor. Data types supported: same as @p input * @param[in] perm Permutation vector * * @return a status diff --git a/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h b/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h deleted file mode 100644 index d127ef8d8a..0000000000 --- a/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H -#define ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H - -#include "arm_compute/core/CPP/ICPPKernel.h" -#include "arm_compute/core/IArray.h" - -#include <cstdint> -#include <mutex> - -namespace arm_compute -{ -/** CPP kernel to perform sorting and euclidean distance */ -class CPPSortEuclideanDistanceKernel : public ICPPKernel -{ -public: - const char *name() const override - { - return "CPPSortEuclideanDistanceKernel"; - } - /** Default constructor */ - CPPSortEuclideanDistanceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPSortEuclideanDistanceKernel(const CPPSortEuclideanDistanceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPSortEuclideanDistanceKernel &operator=(const CPPSortEuclideanDistanceKernel &) = delete; - /** Allow instances of this class to be moved */ - CPPSortEuclideanDistanceKernel(CPPSortEuclideanDistanceKernel &&) = default; - /** Allow instances of this class to be moved */ - CPPSortEuclideanDistanceKernel &operator=(CPPSortEuclideanDistanceKernel &&) = default; - /** Initialise the kernel's source, destination and border mode. - * - * @param[in,out] in_out Input internal keypoints. Marked as out as the kernel writes 0 in the strength member. - * @param[out] output Output keypoints. - * @param[in] num_corner_candidates Pointer to the number of corner candidates in the input array - * @param[in] min_distance Radial Euclidean distance to use - */ - void configure(InternalKeypoint *in_out, IKeyPointArray *output, const int32_t *num_corner_candidates, float min_distance); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - const int32_t *_num_corner_candidates; /**< Number of corner candidates */ - float _min_distance; /**< Radial Euclidean distance */ - InternalKeypoint *_in_out; /**< Source array of InternalKeypoint */ - IKeyPointArray *_output; /**< Destination array of IKeyPointArray */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H */ diff --git a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h index 4b9bfdd3c9..7326a10e2f 100644 --- a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h +++ b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 ARM Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -54,7 +54,7 @@ public: /** Set the input and output of the kernel. * * @param[in] predictions A batch_size x classes tensor. Data types supported: F16/S32/F32/QASYMM8/QASYMM8_SIGNED - * @param[in] targets A batch_size 1D tensor of class ids. Data types supported: S32 + * @param[in] targets A batch_size 1D tensor of class ids. Data types supported: U32 * @param[out] output Computed precision at @p k as a bool 1D tensor. Data types supported: U8 * @param[in] k Number of top elements to look at for computing precision. */ @@ -63,13 +63,14 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CPPTopKVKernel * * @param[in] predictions A batch_size x classes tensor info. Data types supported: F16/S32/F32/QASYMM8/QASYMM8_SIGNED - * @param[in] targets A batch_size 1D tensor info of class ids. Data types supported: S32 + * @param[in] targets A batch_size 1D tensor info of class ids. Data types supported: U32 * @param[in] output Computed precision at @p k as a bool 1D tensor info. Data types supported: U8 * @param[in] k Number of top elements to look at for computing precision. * * @return a status */ - static Status validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k); + static Status + validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; diff --git a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h index 9fbc9b697c..dd7e07c390 100644 --- a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h +++ b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -55,8 +55,8 @@ public: /** Set the input and output of the kernel. * - * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED - * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] input The input tensor to upsample. Data types supported: All. + * @param[out] output The output tensor. Data types supported: same as @p input. * @param[in] info Padding info. */ void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info); |