diff options
Diffstat (limited to 'arm_compute/core/CPP')
-rw-r--r-- | arm_compute/core/CPP/CPPTypes.h | 123 | ||||
-rw-r--r-- | arm_compute/core/CPP/ICPPKernel.h | 20 | ||||
-rw-r--r-- | arm_compute/core/CPP/ICPPSimpleKernel.h | 76 | ||||
-rw-r--r-- | arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h | 18 | ||||
-rw-r--r-- | arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h | 18 | ||||
-rw-r--r-- | arm_compute/core/CPP/kernels/CPPTopKVKernel.h | 3 |
6 files changed, 138 insertions, 120 deletions
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h index 11891937d1..e5322bdcb1 100644 --- a/arm_compute/core/CPP/CPPTypes.h +++ b/arm_compute/core/CPP/CPPTypes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPP_TYPES_H -#define ARM_COMPUTE_CPP_TYPES_H +#ifndef ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H +#define ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H #include "arm_compute/core/Error.h" @@ -30,17 +30,26 @@ namespace arm_compute { +namespace cpuinfo +{ +struct CpuIsaInfo; +} // namespace cpuinfo + #define ARM_COMPUTE_CPU_MODEL_LIST \ X(GENERIC) \ X(GENERIC_FP16) \ X(GENERIC_FP16_DOT) \ - X(A35) \ X(A53) \ X(A55r0) \ X(A55r1) \ + X(A35) \ X(A73) \ - X(KLEIN) \ - X(X1) + X(A76) \ + X(A510) \ + X(X1) \ + X(V1) \ + X(A64FX) \ + X(N1) /** CPU models types * @@ -56,39 +65,79 @@ enum class CPUModel class CPUInfo final { -public: - /** Constructor */ +protected: CPUInfo(); ~CPUInfo(); - /** Disable copy constructor and assignment operator to avoid copying the vector of CPUs each time - * CPUInfo is initialized once in the IScheduler and ThreadInfo will get a pointer to it. +public: + /** Access the KernelLibrary singleton. + * This method has been deprecated and will be removed in future releases + * @return The KernelLibrary instance. */ - CPUInfo &operator=(const CPUInfo &cpuinfo) = delete; - CPUInfo(const CPUInfo &cpuinfo) = delete; - CPUInfo &operator=(CPUInfo &&cpuinfo) = default; - CPUInfo(CPUInfo &&cpuinfo) = default; + static CPUInfo &get(); + + /* Delete move and copy constructors and assignment operator + s */ + CPUInfo(CPUInfo const &) = delete; // Copy construct + CPUInfo(CPUInfo &&) = delete; // Move construct + CPUInfo &operator=(CPUInfo const &) = delete; // Copy assign + CPUInfo &operator=(CPUInfo &&) = delete; // Move assign /** Checks if the cpu model supports fp16. * - * @return true of the cpu supports fp16, false otherwise + * @return true if the cpu supports fp16, false otherwise */ bool has_fp16() const; /** Checks if the cpu model supports bf16. * - * @return true of the cpu supports bf16, false otherwise + * @return true if the cpu supports bf16, false otherwise */ bool has_bf16() const; + /** Checks if the cpu model supports bf16. + * + * @return true if the cpu supports bf16, false otherwise + */ + bool has_svebf16() const; /** Checks if the cpu model supports dot product. * - * @return true of the cpu supports dot product, false otherwise + * @return true if the cpu supports dot product, false otherwise */ bool has_dotprod() const; + /** Checks if the cpu model supports floating-point matrix multiplication. + * + * @return true if the cpu supports floating-point matrix multiplication, false otherwise + */ + bool has_svef32mm() const; + /** Checks if the cpu model supports integer matrix multiplication. + * + * @return true if the cpu supports integer matrix multiplication, false otherwise + */ + bool has_i8mm() const; + /** Checks if the cpu model supports integer matrix multiplication. + * + * @return true if the cpu supports integer matrix multiplication, false otherwise + */ + bool has_svei8mm() const; /** Checks if the cpu model supports sve. * - * @return true of the cpu supports sve, false otherwise + * @return true if the cpu supports sve, false otherwise */ bool has_sve() const; + /** Checks if the cpu model supports sve2. + * + * @return true if the cpu supports sve2, false otherwise + */ + bool has_sve2() const; + /** Checks if the cpu model supports sme. + * + * @return true if the cpu supports sme, false otherwise + */ + bool has_sme() const; + /** Checks if the cpu model supports sme2. + * + * @return true if the cpu supports sme2, false otherwise + */ + bool has_sme2() const; /** Gets the cpu model for a given cpuid. * * @param[in] cpuid the id of the cpu core to be retrieved, @@ -101,6 +150,11 @@ public: * @return Current thread's @ref CPUModel */ CPUModel get_cpu_model() const; + /** Gets the current cpu's ISA information + * + * @return Current cpu's ISA information + */ + cpuinfo::CpuIsaInfo get_isa() const; /** Gets the L1 cache size * * @return the size of the L1 cache @@ -111,22 +165,29 @@ public: * @return the size of the L1 cache */ unsigned int get_L2_cache_size() const; - /** Set fp16 support + /** Return the maximum number of CPUs present + * + * @return Number of CPUs + */ + unsigned int get_cpu_num() const; + /** Return the maximum number of CPUs present excluding the little cores + * in case of an Android device * - * @param[in] fp16 whether the cpu supports fp16. + * @return Number of CPUs excluding little */ - void set_fp16(const bool fp16); - /** Set dot product support + unsigned int get_cpu_num_excluding_little() const; + /** Return whether the device has little, medium and big CPUs in case + * of an Android device, returns false otherwise * - * @param[in] dotprod whether the cpu supports dot product. + * @return Whether the device has little, medium and big CPUs */ - void set_dotprod(const bool dotprod); + bool cpu_has_little_mid_big() const; - /** Return the maximum number of CPUs present + /** Return the vector length in bytes for sme2 * - * @return Number of CPUs + * @return Vector length if sme2 is enabled, otherwise returns 0. */ - unsigned int get_cpu_num() const; + unsigned long get_sme2_vector_length() const; private: struct Impl; @@ -136,9 +197,9 @@ private: /** Information about executing thread and CPU. */ struct ThreadInfo { - int thread_id{ 0 }; - int num_threads{ 1 }; - const CPUInfo *cpu_info{ nullptr }; + int thread_id{0}; + int num_threads{1}; + const CPUInfo *cpu_info{nullptr}; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_CPP_TYPES_H */ +#endif // ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h index ab369ffe1d..03967a536d 100644 --- a/arm_compute/core/CPP/ICPPKernel.h +++ b/arm_compute/core/CPP/ICPPKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,9 +25,9 @@ #define ARM_COMPUTE_ICPPKERNEL_H #include "arm_compute/core/CPP/CPPTypes.h" +#include "arm_compute/core/experimental/Types.h" #include "arm_compute/core/IKernel.h" #include "arm_compute/core/Types.h" -#include "arm_compute/core/experimental/Types.h" namespace arm_compute { @@ -38,6 +38,8 @@ class ITensor; class ICPPKernel : public IKernel { public: + static constexpr size_t default_mws = 1; /* Default minimum workload size value - no impact */ + /** Default destructor */ virtual ~ICPPKernel() = default; @@ -88,6 +90,20 @@ public: ARM_COMPUTE_UNUSED(tensors, window, info); } + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return Minimum workload size for requested configuration. + */ + virtual size_t get_mws(const CPUInfo &platform, size_t thread_count) const + { + ARM_COMPUTE_UNUSED(platform, thread_count); + + return default_mws; + } + /** Name of the kernel * * @return Kernel name diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h deleted file mode 100644 index c31d487a45..0000000000 --- a/arm_compute/core/CPP/ICPPSimpleKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICPPSIMPLEKERNEL_H -#define ARM_COMPUTE_ICPPSIMPLEKERNEL_H - -#include "arm_compute/core/CPP/ICPPKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for simple C++ kernels having 1 tensor input and 1 tensor output */ -class ICPPSimpleKernel : public ICPPKernel -{ -public: - /** Constructor */ - ICPPSimpleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICPPSimpleKernel(const ICPPSimpleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICPPSimpleKernel &operator=(const ICPPSimpleKernel &) = delete; - /** Allow instances of this class to be moved */ - ICPPSimpleKernel(ICPPSimpleKernel &&) = default; - /** Allow instances of this class to be moved */ - ICPPSimpleKernel &operator=(ICPPSimpleKernel &&) = default; - /** Default destructor */ - ~ICPPSimpleKernel() = default; - -protected: - /** Configure the kernel - * - * @param[in] input Source tensor. - * @param[out] output Destination tensor. - * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. - * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. - * @param[in] border_size (Optional) Size of the border. - */ - void configure(const ITensor *input, ITensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); - /** Static function to check if given info will lead to a valid configuration of @ref ICPPSimpleKernel. - * - * @param[in] input Source tensor info. - * @param[in] output Destination tensor info. - * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. - * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. - * @param[in] border_size (Optional) Size of the border. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration, - bool border_undefined = false, const BorderSize &border_size = BorderSize()); - -protected: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_ICPPSIMPLEKERNEL_H */ diff --git a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h index 068b37d80c..dd91595ea6 100644 --- a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h +++ b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h @@ -63,8 +63,16 @@ public: * @param[out] keeps_size (Optional) Number of filtered indices per class tensor of size [num_classes]. Data types supported: U32 * @param[in] info (Optional) BoxNMSLimitInfo information. */ - void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes, - ITensor *batch_splits_out = nullptr, ITensor *keeps = nullptr, ITensor *keeps_size = nullptr, const BoxNMSLimitInfo info = BoxNMSLimitInfo()); + void configure(const ITensor *scores_in, + const ITensor *boxes_in, + const ITensor *batch_splits_in, + ITensor *scores_out, + ITensor *boxes_out, + ITensor *classes, + ITensor *batch_splits_out = nullptr, + ITensor *keeps = nullptr, + ITensor *keeps_size = nullptr, + const BoxNMSLimitInfo info = BoxNMSLimitInfo()); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; @@ -74,9 +82,9 @@ public: void run_nmslimit(); private: - const ITensor *_scores_in; - const ITensor *_boxes_in; - const ITensor *_batch_splits_in; + const ITensor *_scores_in; + const ITensor *_boxes_in; + const ITensor *_batch_splits_in; ITensor *_scores_out; ITensor *_boxes_out; ITensor *_classes; diff --git a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h index e32b5d8f7b..d1f7f8670f 100644 --- a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h +++ b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H #define ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H -#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h" namespace arm_compute { @@ -65,7 +64,12 @@ public: * @param[in] iou_threshold The threshold used in non maximum suppression. * */ - void configure(const ITensor *input_bboxes, const ITensor *input_scores, ITensor *output_indices, unsigned int max_output_size, const float score_threshold, const float iou_threshold); + void configure(const ITensor *input_bboxes, + const ITensor *input_scores, + ITensor *output_indices, + unsigned int max_output_size, + const float score_threshold, + const float iou_threshold); /** Static function to check if given arguments will lead to a valid configuration of @ref CPPNonMaximumSuppressionKernel * @@ -77,8 +81,12 @@ public: * @param[in] iou_threshold The threshold used in non maximum suppression. * */ - static Status validate(const ITensorInfo *input_bboxes, const ITensorInfo *input_scores, const ITensorInfo *output_indices, unsigned int max_output_size, - const float score_threshold, const float iou_threshold); + static Status validate(const ITensorInfo *input_bboxes, + const ITensorInfo *input_scores, + const ITensorInfo *output_indices, + unsigned int max_output_size, + const float score_threshold, + const float iou_threshold); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; diff --git a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h index 1245dbc14c..7326a10e2f 100644 --- a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h +++ b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h @@ -69,7 +69,8 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k); + static Status + validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; |