aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/CPP
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/core/CPP')
-rw-r--r--arm_compute/core/CPP/CPPTypes.h123
-rw-r--r--arm_compute/core/CPP/ICPPKernel.h20
-rw-r--r--arm_compute/core/CPP/ICPPSimpleKernel.h76
-rw-r--r--arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h18
-rw-r--r--arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h18
-rw-r--r--arm_compute/core/CPP/kernels/CPPTopKVKernel.h3
6 files changed, 138 insertions, 120 deletions
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h
index 11891937d1..e5322bdcb1 100644
--- a/arm_compute/core/CPP/CPPTypes.h
+++ b/arm_compute/core/CPP/CPPTypes.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CPP_TYPES_H
-#define ARM_COMPUTE_CPP_TYPES_H
+#ifndef ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H
+#define ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H
#include "arm_compute/core/Error.h"
@@ -30,17 +30,26 @@
namespace arm_compute
{
+namespace cpuinfo
+{
+struct CpuIsaInfo;
+} // namespace cpuinfo
+
#define ARM_COMPUTE_CPU_MODEL_LIST \
X(GENERIC) \
X(GENERIC_FP16) \
X(GENERIC_FP16_DOT) \
- X(A35) \
X(A53) \
X(A55r0) \
X(A55r1) \
+ X(A35) \
X(A73) \
- X(KLEIN) \
- X(X1)
+ X(A76) \
+ X(A510) \
+ X(X1) \
+ X(V1) \
+ X(A64FX) \
+ X(N1)
/** CPU models types
*
@@ -56,39 +65,79 @@ enum class CPUModel
class CPUInfo final
{
-public:
- /** Constructor */
+protected:
CPUInfo();
~CPUInfo();
- /** Disable copy constructor and assignment operator to avoid copying the vector of CPUs each time
- * CPUInfo is initialized once in the IScheduler and ThreadInfo will get a pointer to it.
+public:
+ /** Access the KernelLibrary singleton.
+ * This method has been deprecated and will be removed in future releases
+ * @return The KernelLibrary instance.
*/
- CPUInfo &operator=(const CPUInfo &cpuinfo) = delete;
- CPUInfo(const CPUInfo &cpuinfo) = delete;
- CPUInfo &operator=(CPUInfo &&cpuinfo) = default;
- CPUInfo(CPUInfo &&cpuinfo) = default;
+ static CPUInfo &get();
+
+ /* Delete move and copy constructors and assignment operator
+ s */
+ CPUInfo(CPUInfo const &) = delete; // Copy construct
+ CPUInfo(CPUInfo &&) = delete; // Move construct
+ CPUInfo &operator=(CPUInfo const &) = delete; // Copy assign
+ CPUInfo &operator=(CPUInfo &&) = delete; // Move assign
/** Checks if the cpu model supports fp16.
*
- * @return true of the cpu supports fp16, false otherwise
+ * @return true if the cpu supports fp16, false otherwise
*/
bool has_fp16() const;
/** Checks if the cpu model supports bf16.
*
- * @return true of the cpu supports bf16, false otherwise
+ * @return true if the cpu supports bf16, false otherwise
*/
bool has_bf16() const;
+ /** Checks if the cpu model supports bf16.
+ *
+ * @return true if the cpu supports bf16, false otherwise
+ */
+ bool has_svebf16() const;
/** Checks if the cpu model supports dot product.
*
- * @return true of the cpu supports dot product, false otherwise
+ * @return true if the cpu supports dot product, false otherwise
*/
bool has_dotprod() const;
+ /** Checks if the cpu model supports floating-point matrix multiplication.
+ *
+ * @return true if the cpu supports floating-point matrix multiplication, false otherwise
+ */
+ bool has_svef32mm() const;
+ /** Checks if the cpu model supports integer matrix multiplication.
+ *
+ * @return true if the cpu supports integer matrix multiplication, false otherwise
+ */
+ bool has_i8mm() const;
+ /** Checks if the cpu model supports integer matrix multiplication.
+ *
+ * @return true if the cpu supports integer matrix multiplication, false otherwise
+ */
+ bool has_svei8mm() const;
/** Checks if the cpu model supports sve.
*
- * @return true of the cpu supports sve, false otherwise
+ * @return true if the cpu supports sve, false otherwise
*/
bool has_sve() const;
+ /** Checks if the cpu model supports sve2.
+ *
+ * @return true if the cpu supports sve2, false otherwise
+ */
+ bool has_sve2() const;
+ /** Checks if the cpu model supports sme.
+ *
+ * @return true if the cpu supports sme, false otherwise
+ */
+ bool has_sme() const;
+ /** Checks if the cpu model supports sme2.
+ *
+ * @return true if the cpu supports sme2, false otherwise
+ */
+ bool has_sme2() const;
/** Gets the cpu model for a given cpuid.
*
* @param[in] cpuid the id of the cpu core to be retrieved,
@@ -101,6 +150,11 @@ public:
* @return Current thread's @ref CPUModel
*/
CPUModel get_cpu_model() const;
+ /** Gets the current cpu's ISA information
+ *
+ * @return Current cpu's ISA information
+ */
+ cpuinfo::CpuIsaInfo get_isa() const;
/** Gets the L1 cache size
*
* @return the size of the L1 cache
@@ -111,22 +165,29 @@ public:
* @return the size of the L1 cache
*/
unsigned int get_L2_cache_size() const;
- /** Set fp16 support
+ /** Return the maximum number of CPUs present
+ *
+ * @return Number of CPUs
+ */
+ unsigned int get_cpu_num() const;
+ /** Return the maximum number of CPUs present excluding the little cores
+ * in case of an Android device
*
- * @param[in] fp16 whether the cpu supports fp16.
+ * @return Number of CPUs excluding little
*/
- void set_fp16(const bool fp16);
- /** Set dot product support
+ unsigned int get_cpu_num_excluding_little() const;
+ /** Return whether the device has little, medium and big CPUs in case
+ * of an Android device, returns false otherwise
*
- * @param[in] dotprod whether the cpu supports dot product.
+ * @return Whether the device has little, medium and big CPUs
*/
- void set_dotprod(const bool dotprod);
+ bool cpu_has_little_mid_big() const;
- /** Return the maximum number of CPUs present
+ /** Return the vector length in bytes for sme2
*
- * @return Number of CPUs
+ * @return Vector length if sme2 is enabled, otherwise returns 0.
*/
- unsigned int get_cpu_num() const;
+ unsigned long get_sme2_vector_length() const;
private:
struct Impl;
@@ -136,9 +197,9 @@ private:
/** Information about executing thread and CPU. */
struct ThreadInfo
{
- int thread_id{ 0 };
- int num_threads{ 1 };
- const CPUInfo *cpu_info{ nullptr };
+ int thread_id{0};
+ int num_threads{1};
+ const CPUInfo *cpu_info{nullptr};
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPP_TYPES_H */
+#endif // ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H
diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h
index ab369ffe1d..03967a536d 100644
--- a/arm_compute/core/CPP/ICPPKernel.h
+++ b/arm_compute/core/CPP/ICPPKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,9 +25,9 @@
#define ARM_COMPUTE_ICPPKERNEL_H
#include "arm_compute/core/CPP/CPPTypes.h"
+#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/core/IKernel.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/experimental/Types.h"
namespace arm_compute
{
@@ -38,6 +38,8 @@ class ITensor;
class ICPPKernel : public IKernel
{
public:
+ static constexpr size_t default_mws = 1; /* Default minimum workload size value - no impact */
+
/** Default destructor */
virtual ~ICPPKernel() = default;
@@ -88,6 +90,20 @@ public:
ARM_COMPUTE_UNUSED(tensors, window, info);
}
+ /** Return minimum workload size of the relevant kernel
+ *
+ * @param[in] platform The CPU platform used to create the context.
+ * @param[in] thread_count Number of threads in the execution.
+ *
+ * @return Minimum workload size for requested configuration.
+ */
+ virtual size_t get_mws(const CPUInfo &platform, size_t thread_count) const
+ {
+ ARM_COMPUTE_UNUSED(platform, thread_count);
+
+ return default_mws;
+ }
+
/** Name of the kernel
*
* @return Kernel name
diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h
deleted file mode 100644
index c31d487a45..0000000000
--- a/arm_compute/core/CPP/ICPPSimpleKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICPPSIMPLEKERNEL_H
-#define ARM_COMPUTE_ICPPSIMPLEKERNEL_H
-
-#include "arm_compute/core/CPP/ICPPKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for simple C++ kernels having 1 tensor input and 1 tensor output */
-class ICPPSimpleKernel : public ICPPKernel
-{
-public:
- /** Constructor */
- ICPPSimpleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICPPSimpleKernel(const ICPPSimpleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICPPSimpleKernel &operator=(const ICPPSimpleKernel &) = delete;
- /** Allow instances of this class to be moved */
- ICPPSimpleKernel(ICPPSimpleKernel &&) = default;
- /** Allow instances of this class to be moved */
- ICPPSimpleKernel &operator=(ICPPSimpleKernel &&) = default;
- /** Default destructor */
- ~ICPPSimpleKernel() = default;
-
-protected:
- /** Configure the kernel
- *
- * @param[in] input Source tensor.
- * @param[out] output Destination tensor.
- * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration.
- * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] border_size (Optional) Size of the border.
- */
- void configure(const ITensor *input, ITensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize());
- /** Static function to check if given info will lead to a valid configuration of @ref ICPPSimpleKernel.
- *
- * @param[in] input Source tensor info.
- * @param[in] output Destination tensor info.
- * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration.
- * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] border_size (Optional) Size of the border.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration,
- bool border_undefined = false, const BorderSize &border_size = BorderSize());
-
-protected:
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_ICPPSIMPLEKERNEL_H */
diff --git a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
index 068b37d80c..dd91595ea6 100644
--- a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
@@ -63,8 +63,16 @@ public:
* @param[out] keeps_size (Optional) Number of filtered indices per class tensor of size [num_classes]. Data types supported: U32
* @param[in] info (Optional) BoxNMSLimitInfo information.
*/
- void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes,
- ITensor *batch_splits_out = nullptr, ITensor *keeps = nullptr, ITensor *keeps_size = nullptr, const BoxNMSLimitInfo info = BoxNMSLimitInfo());
+ void configure(const ITensor *scores_in,
+ const ITensor *boxes_in,
+ const ITensor *batch_splits_in,
+ ITensor *scores_out,
+ ITensor *boxes_out,
+ ITensor *classes,
+ ITensor *batch_splits_out = nullptr,
+ ITensor *keeps = nullptr,
+ ITensor *keeps_size = nullptr,
+ const BoxNMSLimitInfo info = BoxNMSLimitInfo());
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
@@ -74,9 +82,9 @@ public:
void run_nmslimit();
private:
- const ITensor *_scores_in;
- const ITensor *_boxes_in;
- const ITensor *_batch_splits_in;
+ const ITensor *_scores_in;
+ const ITensor *_boxes_in;
+ const ITensor *_batch_splits_in;
ITensor *_scores_out;
ITensor *_boxes_out;
ITensor *_classes;
diff --git a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
index e32b5d8f7b..d1f7f8670f 100644
--- a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H
#define ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H
-#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
namespace arm_compute
{
@@ -65,7 +64,12 @@ public:
* @param[in] iou_threshold The threshold used in non maximum suppression.
*
*/
- void configure(const ITensor *input_bboxes, const ITensor *input_scores, ITensor *output_indices, unsigned int max_output_size, const float score_threshold, const float iou_threshold);
+ void configure(const ITensor *input_bboxes,
+ const ITensor *input_scores,
+ ITensor *output_indices,
+ unsigned int max_output_size,
+ const float score_threshold,
+ const float iou_threshold);
/** Static function to check if given arguments will lead to a valid configuration of @ref CPPNonMaximumSuppressionKernel
*
@@ -77,8 +81,12 @@ public:
* @param[in] iou_threshold The threshold used in non maximum suppression.
*
*/
- static Status validate(const ITensorInfo *input_bboxes, const ITensorInfo *input_scores, const ITensorInfo *output_indices, unsigned int max_output_size,
- const float score_threshold, const float iou_threshold);
+ static Status validate(const ITensorInfo *input_bboxes,
+ const ITensorInfo *input_scores,
+ const ITensorInfo *output_indices,
+ unsigned int max_output_size,
+ const float score_threshold,
+ const float iou_threshold);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
index 1245dbc14c..7326a10e2f 100644
--- a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
@@ -69,7 +69,8 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k);
+ static Status
+ validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;