aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/core')
-rw-r--r--arm_compute/core/CL/CLCompileContext.h33
-rw-r--r--arm_compute/core/CL/CLDevice.h38
-rw-r--r--arm_compute/core/CL/CLHelpers.h68
-rw-r--r--arm_compute/core/CL/CLTypes.h22
-rw-r--r--arm_compute/core/CL/ICLArray.h9
-rw-r--r--arm_compute/core/CL/ICLTensor.h11
-rw-r--r--arm_compute/core/CL/OpenCL.h40
-rw-r--r--arm_compute/core/CPP/CPPTypes.h123
-rw-r--r--arm_compute/core/CPP/ICPPKernel.h20
-rw-r--r--arm_compute/core/CPP/ICPPSimpleKernel.h76
-rw-r--r--arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h18
-rw-r--r--arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h18
-rw-r--r--arm_compute/core/CPP/kernels/CPPTopKVKernel.h3
-rw-r--r--arm_compute/core/Coordinates.h5
-rw-r--r--arm_compute/core/CoreTypes.h352
-rw-r--r--arm_compute/core/Dimensions.h15
-rw-r--r--arm_compute/core/Error.h145
-rw-r--r--arm_compute/core/GPUTarget.h59
-rw-r--r--arm_compute/core/Helpers.h74
-rw-r--r--arm_compute/core/Helpers.inl130
-rw-r--r--arm_compute/core/IAccessWindow.h15
-rw-r--r--arm_compute/core/IArray.h11
-rw-r--r--arm_compute/core/IKernel.h2
-rw-r--r--arm_compute/core/ITensor.h6
-rw-r--r--arm_compute/core/ITensorInfo.h65
-rw-r--r--arm_compute/core/ITensorPack.h12
-rw-r--r--arm_compute/core/KernelDescriptors.h189
-rw-r--r--arm_compute/core/Log.h16
-rw-r--r--arm_compute/core/PixelValue.h84
-rw-r--r--arm_compute/core/QuantizationInfo.h125
-rw-r--r--arm_compute/core/Rounding.h2
-rw-r--r--arm_compute/core/Size2D.h7
-rw-r--r--arm_compute/core/Size3D.h97
-rw-r--r--arm_compute/core/Steps.h5
-rw-r--r--arm_compute/core/Strides.h6
-rw-r--r--arm_compute/core/SubTensorInfo.h40
-rw-r--r--arm_compute/core/TensorInfo.h100
-rw-r--r--arm_compute/core/TensorShape.h41
-rw-r--r--arm_compute/core/Types.h1134
-rw-r--r--arm_compute/core/Utils.h1067
-rw-r--r--arm_compute/core/Validate.h555
-rw-r--r--arm_compute/core/Version.h2
-rw-r--r--arm_compute/core/Window.h57
-rw-r--r--arm_compute/core/Window.inl75
-rw-r--r--arm_compute/core/WindowIterator.h35
-rw-r--r--arm_compute/core/experimental/Types.h65
-rw-r--r--arm_compute/core/utils/ActivationFunctionUtils.h41
-rw-r--r--arm_compute/core/utils/DataLayoutUtils.h40
-rw-r--r--arm_compute/core/utils/DataTypeUtils.h549
-rw-r--r--arm_compute/core/utils/FormatUtils.h344
-rw-r--r--arm_compute/core/utils/InterpolationPolicyUtils.h41
-rw-r--r--arm_compute/core/utils/StringUtils.h65
-rw-r--r--arm_compute/core/utils/helpers/AdjustVecSize.h55
-rw-r--r--arm_compute/core/utils/helpers/tensor_transform.h33
-rw-r--r--arm_compute/core/utils/logging/FilePrinter.h3
-rw-r--r--arm_compute/core/utils/logging/Helpers.h3
-rw-r--r--arm_compute/core/utils/logging/IPrinter.h3
-rw-r--r--arm_compute/core/utils/logging/LogMsgDecorators.h5
-rw-r--r--arm_compute/core/utils/logging/Logger.h6
-rw-r--r--arm_compute/core/utils/logging/LoggerRegistry.h13
-rw-r--r--arm_compute/core/utils/logging/Macros.h34
-rw-r--r--arm_compute/core/utils/logging/Types.h6
-rw-r--r--arm_compute/core/utils/math/Math.h71
-rw-r--r--arm_compute/core/utils/math/SafeOps.h27
-rw-r--r--arm_compute/core/utils/misc/InfoHelpers.h54
-rw-r--r--arm_compute/core/utils/misc/MMappedFile.h4
-rw-r--r--arm_compute/core/utils/misc/Macros.h9
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h656
-rw-r--r--arm_compute/core/utils/misc/Traits.h1
-rw-r--r--arm_compute/core/utils/misc/Utility.h38
-rw-r--r--arm_compute/core/utils/quantization/AsymmHelpers.h31
71 files changed, 4375 insertions, 2829 deletions
diff --git a/arm_compute/core/CL/CLCompileContext.h b/arm_compute/core/CL/CLCompileContext.h
index 46a8c9b341..dcd3b45670 100644
--- a/arm_compute/core/CL/CLCompileContext.h
+++ b/arm_compute/core/CL/CLCompileContext.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -77,6 +77,8 @@ public:
*/
const StringSet &options() const;
+ bool operator==(const CLBuildOptions &other) const;
+
private:
StringSet _build_opts; /**< Build options set */
};
@@ -248,8 +250,12 @@ public:
*
* @return The created kernel.
*/
- Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source,
- const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const;
+ Kernel create_kernel(const std::string &kernel_name,
+ const std::string &program_name,
+ const std::string &program_source,
+ const std::string &kernel_path,
+ const StringSet &build_options_set,
+ bool is_binary) const;
/** Clear the library's cache of binary programs
*/
@@ -302,6 +308,18 @@ public:
*/
bool is_wbsm_supported() const;
+ /** Return the DDK version. If the DDK version cannot be detected, return -1.
+ *
+ * @return The DDK version.
+ */
+ int32_t get_ddk_version() const;
+
+ /** Return the Gpu target of the associated device
+ *
+ * @return GPUTarget
+ */
+ GPUTarget get_gpu_target() const;
+
private:
/** Load program and its dependencies.
*
@@ -309,7 +327,8 @@ private:
* @param[in] program_source Source of the program.
* @param[in] is_binary Flag to indicate if the program source is binary.
*/
- const Program &load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const;
+ const Program &
+ load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const;
/** Generates the build options given a string of user defined ones
*
@@ -329,11 +348,11 @@ private:
*/
std::string stringify_set(const StringSet &s, const std::string &kernel_path) const;
- cl::Context _context; /**< Underlying CL context. */
- CLDevice _device; /**< Underlying CL device. */
+ cl::Context _context; /**< Underlying CL context. */
+ CLDevice _device; /**< Underlying CL device. */
mutable std::map<std::string, const Program> _programs_map; /**< Map with all already loaded program data. */
mutable std::map<std::string, cl::Program> _built_programs_map; /**< Map with all already built program data. */
- bool _is_wbsm_supported; /**< Support of worksize batch size modifier support boolean*/
+ bool _is_wbsm_supported; /**< Support of worksize batch size modifier support boolean*/
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLCOMPILECONTEXT_H */
diff --git a/arm_compute/core/CL/CLDevice.h b/arm_compute/core/CL/CLDevice.h
index 06aaac88f4..ded6bb8493 100644
--- a/arm_compute/core/CL/CLDevice.h
+++ b/arm_compute/core/CL/CLDevice.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,8 +44,7 @@ class CLDevice : public IDevice
{
public:
/** Default Constructor */
- CLDevice()
- : _device(cl::Device()), _options()
+ CLDevice() : _device(cl::Device()), _options()
{
}
@@ -53,8 +52,7 @@ public:
*
* @param[in] cl_device OpenCL device
*/
- CLDevice(const cl::Device &cl_device)
- : _device(), _options()
+ CLDevice(const cl::Device &cl_device) : _device(), _options()
{
_device = cl_device;
@@ -66,13 +64,13 @@ public:
std::string extensions = _device.getInfo<CL_DEVICE_EXTENSIONS>();
std::istringstream iss(extensions);
- for(std::string s; iss >> s;)
+ for (std::string s; iss >> s;)
{
_options.extensions.insert(s);
}
// SW workaround for G76
- if(_options.gpu_target == GPUTarget::G76)
+ if (_options.gpu_target == GPUTarget::G76)
{
_options.extensions.insert("cl_arm_integer_dot_product_int8");
}
@@ -143,6 +141,32 @@ public:
return _options.extensions.count(extension) != 0;
}
+ /** Returns whether non-uniform workgroup is supported and the build options.
+ *
+ * If the feature is supported, the appropriate build options will be
+ * appended to the specified string.
+ *
+ * @return A tuple (supported, build_options) indicating whether the feature
+ * is supported and the corresponding build options to enable it.
+ */
+ std::tuple<bool, std::string> is_non_uniform_workgroup_supported() const
+ {
+ if (version() == CLVersion::CL30 && get_cl_non_uniform_work_group_supported(_device))
+ {
+ return {true, " -cl-std=CL3.0 "};
+ }
+ else if (version() == CLVersion::CL20)
+ {
+ return {true, " -cl-std=CL2.0 "};
+ }
+ else if (supported("cl_arm_non_uniform_work_group_size"))
+ {
+ return {true, " -cl-arm-non-uniform-work-group-size "};
+ }
+
+ return {false, ""};
+ }
+
private:
cl::Device _device; /**< OpenCL device. */
struct CLDeviceOptions _options; /**< OpenCL device options */
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index a9ac6a5933..1a639e47f9 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,11 +31,6 @@
#include <set>
#include <string>
-/* CL Device capabilities */
-#define ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM 0x41E4
-/* Workgroup Batch Size Modifier */
-#define ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM 0x41E6
-
namespace arm_compute
{
class CLCompileContext;
@@ -46,6 +41,9 @@ enum class DataType;
/** Max vector width of an OpenCL vector */
static constexpr unsigned int max_cl_vector_width = 16;
+/** Max number of manual loop unrolling */
+static constexpr int max_manual_loop_unrolling = 128;
+
/** Translates a tensor data type to the appropriate OpenCL type.
*
* @param[in] dt @ref DataType to be translated to OpenCL type.
@@ -126,6 +124,14 @@ CLVersion get_cl_version(const cl::Device &device);
*/
size_t get_cl_image_pitch_alignment(const cl::Device &device);
+/** Helper function to check whether non-uniform work group is supported
+ *
+ * @param[in] device A CL device
+ *
+ * @return True if the feature is supported
+ */
+bool get_cl_non_uniform_work_group_supported(const cl::Device &device);
+
/** Helper function to check whether a given extension is supported
*
* @param[in] device A CL device
@@ -173,7 +179,9 @@ bool dot8_acc_supported(const cl::Device &device);
*
* @return True if the configuration is supported
*/
-bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout);
+bool cl_winograd_convolution_layer_supported(const Size2D &output_tile,
+ const Size2D &kernel_size,
+ DataLayout data_layout);
/** Helper function to get the preferred native vector width size for built-in scalar types that can be put into vectors
*
@@ -209,7 +217,9 @@ bool image2d_from_buffer_supported(const cl::Device &device);
*
* @return An opencl kernel
*/
-cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts = std::set<std::string>());
+cl::Kernel create_kernel(const CLCompileContext &ctx,
+ const std::string &kernel_name,
+ const std::set<std::string> &build_opts = std::set<std::string>());
/** Creates a suitable LWS hint object for parallel implementations. Sets the number of WG based on the input size.
* If input width is smaller than 128 we can use fewer threads than 8.
@@ -236,5 +246,47 @@ bool get_wbsm_support_info(const cl::Device &device);
*/
void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint);
+/* Helper function to check if we can export the tensor to cl_image
+ *
+ * @param[in] input tensor
+ *
+ * @return true if we can export the tensor to cl_image
+ */
+bool export_to_cl_image(const ITensorInfo *tensor);
+
+/* Helper function to force unroll with pragma when any of the input values (iterations) are greater than @ref max_manual_loop_unrolling
+ *
+ * This function passes UNROLL_WITH_PRAGMA at compile time when any of the input values are greater than @ref max_manual_loop_unrolling
+ *
+ * @param[in] built_opts OpenCL kernel build options
+ * @param[in] values Input values (iterations)
+ *
+ */
+void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list<int> values);
+
+/** Helper function to check whether the cl_arm_matrix_multiply extension is supported
+ *
+ * @param[in] device A CL device
+ *
+ * @return True if the extension is supported
+ */
+bool arm_matrix_multiply_supported(const cl::Device &device);
+
+/** Check whether cl_khr_command_buffer extension is supported by the specified CL device.
+ *
+ * @param[in] device The CL device
+ *
+ * @return True if the extension is supported by the CL device.
+ */
+bool command_buffer_supported(const cl::Device &device);
+
+/** Check whether cl_khr_command_buffer_mutable_dispatch extension is supported by the specified CL device.
+ *
+ * @param[in] device The CL device
+ *
+ * @return True if the extension is supported by the CL device.
+ */
+bool command_buffer_mutable_dispatch_supported(const cl::Device &device);
+
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLHELPERS_H */
diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h
index 2ba2e8dd62..0f088e2b10 100644
--- a/arm_compute/core/CL/CLTypes.h
+++ b/arm_compute/core/CL/CLTypes.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,8 @@ enum class CLVersion
CL10, /* the OpenCL 1.0 */
CL11, /* the OpenCL 1.1 */
CL12, /* the OpenCL 1.2 */
- CL20, /* the OpenCL 2.0 and above */
+ CL20, /* the OpenCL 2.x */
+ CL30, /* the OpenCL 3.x */
UNKNOWN /* unkown version */
};
@@ -62,18 +63,27 @@ struct CLDeviceOptions
struct CLQuantization
{
/** Default Constructor */
- CLQuantization()
- : scale(nullptr), offset(nullptr) {};
+ CLQuantization() : scale(nullptr), offset(nullptr){};
/** Constructor
*
* @param[in] scale OpenCL scale array
* @param[in] offset OpenCL offset array
*/
- CLQuantization(const ICLFloatArray *scale, const ICLInt32Array *offset)
- : scale(scale), offset(offset) {};
+ CLQuantization(const ICLFloatArray *scale, const ICLInt32Array *offset) : scale(scale), offset(offset){};
const ICLFloatArray *scale; /**< Quantization scale array */
const ICLInt32Array *offset; /**< Quantization offset array */
};
+
+enum CLKernelType
+{
+ UNKNOWN, /**< Unknown CL kernel type */
+ DEPTHWISE, /**< Depthwise CL kernel type */
+ DIRECT, /**< Direct Convolution CL kernel type */
+ ELEMENTWISE, /**< Elementwise CL kernel type */
+ GEMM, /**< GEMM CL kernel type */
+ POOL, /**< Pool CL kernel type */
+ WINOGRAD /**< Winograd CL kernel type */
+};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CL_TYPES_H */
diff --git a/arm_compute/core/CL/ICLArray.h b/arm_compute/core/CL/ICLArray.h
index b900117724..a2b2baa5b3 100644
--- a/arm_compute/core/CL/ICLArray.h
+++ b/arm_compute/core/CL/ICLArray.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,8 +40,7 @@ public:
* @param[in] max_num_values Maximum size of the array.
*
*/
- explicit ICLArray(size_t max_num_values)
- : IArray<T>(max_num_values), _mapping(nullptr)
+ explicit ICLArray(size_t max_num_values) : IArray<T>(max_num_values), _mapping(nullptr)
{
}
@@ -66,8 +65,6 @@ public:
* @param[in] blocking If true, then the mapping will be ready to use by the time
* this method returns, else it is the caller's responsibility
* to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
- *
- * @return The mapping address.
*/
void map(cl::CommandQueue &q, bool blocking = true)
{
@@ -127,5 +124,5 @@ using ICLInt16Array = ICLArray<cl_short>;
using ICLInt32Array = ICLArray<cl_int>;
/** Interface for OpenCL Array of floats. */
using ICLFloatArray = ICLArray<cl_float>;
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_ICLARRAY_H*/
diff --git a/arm_compute/core/CL/ICLTensor.h b/arm_compute/core/CL/ICLTensor.h
index fd05e64732..8de5423762 100644
--- a/arm_compute/core/CL/ICLTensor.h
+++ b/arm_compute/core/CL/ICLTensor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2019, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_ICLTENSOR_H
#define ARM_COMPUTE_ICLTENSOR_H
-#include "arm_compute/core/ITensor.h"
-
#include "arm_compute/core/CL/CLTypes.h"
+#include "arm_compute/core/ITensor.h"
#include <cstdint>
@@ -34,7 +33,7 @@ namespace cl
{
class Buffer;
class CommandQueue;
-}
+} // namespace cl
namespace arm_compute
{
@@ -71,8 +70,6 @@ public:
* @param[in] blocking If true, then the mapping will be ready to use by the time
* this method returns, else it is the caller's responsibility
* to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
- *
- * @return The mapping address.
*/
void map(cl::CommandQueue &q, bool blocking = true);
/** Enqueue an unmap operation of the allocated and mapped buffer on the given queue.
@@ -115,5 +112,5 @@ private:
};
using ICLImage = ICLTensor;
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_ICLTENSOR_H */
diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h
index bbe469f1a8..8b5bf97099 100644
--- a/arm_compute/core/CL/OpenCL.h
+++ b/arm_compute/core/CL/OpenCL.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_OPENCL_H
-#define ARM_COMPUTE_OPENCL_H
+#ifndef ACL_ARM_COMPUTE_CORE_CL_OPENCL_H
+#define ACL_ARM_COMPUTE_CORE_CL_OPENCL_H
#include <string>
#include <utility>
@@ -31,8 +31,8 @@
#ifndef ARM_COMPUTE_NO_EXCEPTIONS
#define CL_HPP_ENABLE_EXCEPTIONS
#endif // ARM_COMPUTE_NO_EXCEPTIONS
-#define CL_TARGET_OPENCL_VERSION 200
-#define CL_HPP_TARGET_OPENCL_VERSION 110
+#define CL_TARGET_OPENCL_VERSION 300
+#define CL_HPP_TARGET_OPENCL_VERSION 110
#define CL_HPP_MINIMUM_OPENCL_VERSION 110
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Weffc++"
@@ -40,8 +40,8 @@
#pragma GCC diagnostic ignored "-Wunused-parameter"
#if defined(__GNUG__) && __GNUG__ >= 8
#pragma GCC diagnostic ignored "-Wcatch-value"
-#endif // defined(__GNUG__) && __GNUG__ >= 8
-#include <CL/cl2.hpp>
+#endif // defined(__GNUG__) && __GNUG__ >= 8
+#include <CL/opencl.hpp> // include new hpp header instead of cl2.hpp
#pragma GCC diagnostic pop
namespace cl
@@ -73,21 +73,22 @@ public:
* @return The static instance of CLSymbols.
*/
static CLSymbols &get();
- /** Load symbols from the given OpenCL library path.
+ /** This method attempts to load the OpenCL symbols from the first available library from the provided OpenCL libraries.
*
- * @param[in] library Path to the OpenCL library.
+ * @param[in] libraries_filenames Vector containing the filenames of the libraries to be loaded.
+ * @param[in] use_loader Use symbol loader function loadOpenCLPointer.
*
- * @return True if loading the library is successful.
+ * @return True if loading the library is successful. False if all the provided libraries could not be loaded.
*/
- bool load(const std::string &library);
+ bool load(const std::vector<std::string> &libraries_filenames, bool use_loader = false);
/** Load symbols from any of the default OpenCL library names.
+ * If all the default libraries could not be loaded, this method will print a warning message and return false.
*
* @return True if loading any library is successful.
*/
bool load_default();
-#define DECLARE_FUNCTION_PTR(func_name) \
- std::function<decltype(func_name)> func_name##_ptr = nullptr
+#define DECLARE_FUNCTION_PTR(func_name) std::function<decltype(func_name)> func_name##_ptr = nullptr
DECLARE_FUNCTION_PTR(clCreateContext);
DECLARE_FUNCTION_PTR(clCreateContextFromType);
@@ -138,6 +139,17 @@ public:
DECLARE_FUNCTION_PTR(clWaitForEvents);
DECLARE_FUNCTION_PTR(clCreateImage);
DECLARE_FUNCTION_PTR(clSetKernelExecInfo);
+ DECLARE_FUNCTION_PTR(clGetExtensionFunctionAddressForPlatform);
+
+ // Command buffer and mutable dispatch command buffer extensions
+ DECLARE_FUNCTION_PTR(clCreateCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clRetainCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clReleaseCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clFinalizeCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clEnqueueCommandBufferKHR);
+ DECLARE_FUNCTION_PTR(clCommandNDRangeKernelKHR);
+
+ DECLARE_FUNCTION_PTR(clUpdateMutableCommandsKHR);
// Third-party extensions
DECLARE_FUNCTION_PTR(clImportMemoryARM);
@@ -148,4 +160,4 @@ private:
std::pair<bool, bool> _loaded;
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_OPENCL_H */
+#endif // ACL_ARM_COMPUTE_CORE_CL_OPENCL_H
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h
index 11891937d1..e5322bdcb1 100644
--- a/arm_compute/core/CPP/CPPTypes.h
+++ b/arm_compute/core/CPP/CPPTypes.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CPP_TYPES_H
-#define ARM_COMPUTE_CPP_TYPES_H
+#ifndef ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H
+#define ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H
#include "arm_compute/core/Error.h"
@@ -30,17 +30,26 @@
namespace arm_compute
{
+namespace cpuinfo
+{
+struct CpuIsaInfo;
+} // namespace cpuinfo
+
#define ARM_COMPUTE_CPU_MODEL_LIST \
X(GENERIC) \
X(GENERIC_FP16) \
X(GENERIC_FP16_DOT) \
- X(A35) \
X(A53) \
X(A55r0) \
X(A55r1) \
+ X(A35) \
X(A73) \
- X(KLEIN) \
- X(X1)
+ X(A76) \
+ X(A510) \
+ X(X1) \
+ X(V1) \
+ X(A64FX) \
+ X(N1)
/** CPU models types
*
@@ -56,39 +65,79 @@ enum class CPUModel
class CPUInfo final
{
-public:
- /** Constructor */
+protected:
CPUInfo();
~CPUInfo();
- /** Disable copy constructor and assignment operator to avoid copying the vector of CPUs each time
- * CPUInfo is initialized once in the IScheduler and ThreadInfo will get a pointer to it.
+public:
+ /** Access the KernelLibrary singleton.
+ * This method has been deprecated and will be removed in future releases
+ * @return The KernelLibrary instance.
*/
- CPUInfo &operator=(const CPUInfo &cpuinfo) = delete;
- CPUInfo(const CPUInfo &cpuinfo) = delete;
- CPUInfo &operator=(CPUInfo &&cpuinfo) = default;
- CPUInfo(CPUInfo &&cpuinfo) = default;
+ static CPUInfo &get();
+
+ /* Delete move and copy constructors and assignment operator
+ s */
+ CPUInfo(CPUInfo const &) = delete; // Copy construct
+ CPUInfo(CPUInfo &&) = delete; // Move construct
+ CPUInfo &operator=(CPUInfo const &) = delete; // Copy assign
+ CPUInfo &operator=(CPUInfo &&) = delete; // Move assign
/** Checks if the cpu model supports fp16.
*
- * @return true of the cpu supports fp16, false otherwise
+ * @return true if the cpu supports fp16, false otherwise
*/
bool has_fp16() const;
/** Checks if the cpu model supports bf16.
*
- * @return true of the cpu supports bf16, false otherwise
+ * @return true if the cpu supports bf16, false otherwise
*/
bool has_bf16() const;
+ /** Checks if the cpu model supports bf16.
+ *
+ * @return true if the cpu supports bf16, false otherwise
+ */
+ bool has_svebf16() const;
/** Checks if the cpu model supports dot product.
*
- * @return true of the cpu supports dot product, false otherwise
+ * @return true if the cpu supports dot product, false otherwise
*/
bool has_dotprod() const;
+ /** Checks if the cpu model supports floating-point matrix multiplication.
+ *
+ * @return true if the cpu supports floating-point matrix multiplication, false otherwise
+ */
+ bool has_svef32mm() const;
+ /** Checks if the cpu model supports integer matrix multiplication.
+ *
+ * @return true if the cpu supports integer matrix multiplication, false otherwise
+ */
+ bool has_i8mm() const;
+ /** Checks if the cpu model supports integer matrix multiplication.
+ *
+ * @return true if the cpu supports integer matrix multiplication, false otherwise
+ */
+ bool has_svei8mm() const;
/** Checks if the cpu model supports sve.
*
- * @return true of the cpu supports sve, false otherwise
+ * @return true if the cpu supports sve, false otherwise
*/
bool has_sve() const;
+ /** Checks if the cpu model supports sve2.
+ *
+ * @return true if the cpu supports sve2, false otherwise
+ */
+ bool has_sve2() const;
+ /** Checks if the cpu model supports sme.
+ *
+ * @return true if the cpu supports sme, false otherwise
+ */
+ bool has_sme() const;
+ /** Checks if the cpu model supports sme2.
+ *
+ * @return true if the cpu supports sme2, false otherwise
+ */
+ bool has_sme2() const;
/** Gets the cpu model for a given cpuid.
*
* @param[in] cpuid the id of the cpu core to be retrieved,
@@ -101,6 +150,11 @@ public:
* @return Current thread's @ref CPUModel
*/
CPUModel get_cpu_model() const;
+ /** Gets the current cpu's ISA information
+ *
+ * @return Current cpu's ISA information
+ */
+ cpuinfo::CpuIsaInfo get_isa() const;
/** Gets the L1 cache size
*
* @return the size of the L1 cache
@@ -111,22 +165,29 @@ public:
* @return the size of the L1 cache
*/
unsigned int get_L2_cache_size() const;
- /** Set fp16 support
+ /** Return the maximum number of CPUs present
+ *
+ * @return Number of CPUs
+ */
+ unsigned int get_cpu_num() const;
+ /** Return the maximum number of CPUs present excluding the little cores
+ * in case of an Android device
*
- * @param[in] fp16 whether the cpu supports fp16.
+ * @return Number of CPUs excluding little
*/
- void set_fp16(const bool fp16);
- /** Set dot product support
+ unsigned int get_cpu_num_excluding_little() const;
+ /** Return whether the device has little, medium and big CPUs in case
+ * of an Android device, returns false otherwise
*
- * @param[in] dotprod whether the cpu supports dot product.
+ * @return Whether the device has little, medium and big CPUs
*/
- void set_dotprod(const bool dotprod);
+ bool cpu_has_little_mid_big() const;
- /** Return the maximum number of CPUs present
+ /** Return the vector length in bytes for sme2
*
- * @return Number of CPUs
+ * @return Vector length if sme2 is enabled, otherwise returns 0.
*/
- unsigned int get_cpu_num() const;
+ unsigned long get_sme2_vector_length() const;
private:
struct Impl;
@@ -136,9 +197,9 @@ private:
/** Information about executing thread and CPU. */
struct ThreadInfo
{
- int thread_id{ 0 };
- int num_threads{ 1 };
- const CPUInfo *cpu_info{ nullptr };
+ int thread_id{0};
+ int num_threads{1};
+ const CPUInfo *cpu_info{nullptr};
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPP_TYPES_H */
+#endif // ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H
diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h
index ab369ffe1d..03967a536d 100644
--- a/arm_compute/core/CPP/ICPPKernel.h
+++ b/arm_compute/core/CPP/ICPPKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,9 +25,9 @@
#define ARM_COMPUTE_ICPPKERNEL_H
#include "arm_compute/core/CPP/CPPTypes.h"
+#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/core/IKernel.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/experimental/Types.h"
namespace arm_compute
{
@@ -38,6 +38,8 @@ class ITensor;
class ICPPKernel : public IKernel
{
public:
+ static constexpr size_t default_mws = 1; /* Default minimum workload size value - no impact */
+
/** Default destructor */
virtual ~ICPPKernel() = default;
@@ -88,6 +90,20 @@ public:
ARM_COMPUTE_UNUSED(tensors, window, info);
}
+ /** Return minimum workload size of the relevant kernel
+ *
+ * @param[in] platform The CPU platform used to create the context.
+ * @param[in] thread_count Number of threads in the execution.
+ *
+ * @return Minimum workload size for requested configuration.
+ */
+ virtual size_t get_mws(const CPUInfo &platform, size_t thread_count) const
+ {
+ ARM_COMPUTE_UNUSED(platform, thread_count);
+
+ return default_mws;
+ }
+
/** Name of the kernel
*
* @return Kernel name
diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h
deleted file mode 100644
index c31d487a45..0000000000
--- a/arm_compute/core/CPP/ICPPSimpleKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICPPSIMPLEKERNEL_H
-#define ARM_COMPUTE_ICPPSIMPLEKERNEL_H
-
-#include "arm_compute/core/CPP/ICPPKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for simple C++ kernels having 1 tensor input and 1 tensor output */
-class ICPPSimpleKernel : public ICPPKernel
-{
-public:
- /** Constructor */
- ICPPSimpleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICPPSimpleKernel(const ICPPSimpleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICPPSimpleKernel &operator=(const ICPPSimpleKernel &) = delete;
- /** Allow instances of this class to be moved */
- ICPPSimpleKernel(ICPPSimpleKernel &&) = default;
- /** Allow instances of this class to be moved */
- ICPPSimpleKernel &operator=(ICPPSimpleKernel &&) = default;
- /** Default destructor */
- ~ICPPSimpleKernel() = default;
-
-protected:
- /** Configure the kernel
- *
- * @param[in] input Source tensor.
- * @param[out] output Destination tensor.
- * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration.
- * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] border_size (Optional) Size of the border.
- */
- void configure(const ITensor *input, ITensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize());
- /** Static function to check if given info will lead to a valid configuration of @ref ICPPSimpleKernel.
- *
- * @param[in] input Source tensor info.
- * @param[in] output Destination tensor info.
- * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration.
- * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] border_size (Optional) Size of the border.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration,
- bool border_undefined = false, const BorderSize &border_size = BorderSize());
-
-protected:
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_ICPPSIMPLEKERNEL_H */
diff --git a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
index 068b37d80c..dd91595ea6 100644
--- a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
@@ -63,8 +63,16 @@ public:
* @param[out] keeps_size (Optional) Number of filtered indices per class tensor of size [num_classes]. Data types supported: U32
* @param[in] info (Optional) BoxNMSLimitInfo information.
*/
- void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes,
- ITensor *batch_splits_out = nullptr, ITensor *keeps = nullptr, ITensor *keeps_size = nullptr, const BoxNMSLimitInfo info = BoxNMSLimitInfo());
+ void configure(const ITensor *scores_in,
+ const ITensor *boxes_in,
+ const ITensor *batch_splits_in,
+ ITensor *scores_out,
+ ITensor *boxes_out,
+ ITensor *classes,
+ ITensor *batch_splits_out = nullptr,
+ ITensor *keeps = nullptr,
+ ITensor *keeps_size = nullptr,
+ const BoxNMSLimitInfo info = BoxNMSLimitInfo());
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
@@ -74,9 +82,9 @@ public:
void run_nmslimit();
private:
- const ITensor *_scores_in;
- const ITensor *_boxes_in;
- const ITensor *_batch_splits_in;
+ const ITensor *_scores_in;
+ const ITensor *_boxes_in;
+ const ITensor *_batch_splits_in;
ITensor *_scores_out;
ITensor *_boxes_out;
ITensor *_classes;
diff --git a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
index e32b5d8f7b..d1f7f8670f 100644
--- a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H
#define ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H
-#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
namespace arm_compute
{
@@ -65,7 +64,12 @@ public:
* @param[in] iou_threshold The threshold used in non maximum suppression.
*
*/
- void configure(const ITensor *input_bboxes, const ITensor *input_scores, ITensor *output_indices, unsigned int max_output_size, const float score_threshold, const float iou_threshold);
+ void configure(const ITensor *input_bboxes,
+ const ITensor *input_scores,
+ ITensor *output_indices,
+ unsigned int max_output_size,
+ const float score_threshold,
+ const float iou_threshold);
/** Static function to check if given arguments will lead to a valid configuration of @ref CPPNonMaximumSuppressionKernel
*
@@ -77,8 +81,12 @@ public:
* @param[in] iou_threshold The threshold used in non maximum suppression.
*
*/
- static Status validate(const ITensorInfo *input_bboxes, const ITensorInfo *input_scores, const ITensorInfo *output_indices, unsigned int max_output_size,
- const float score_threshold, const float iou_threshold);
+ static Status validate(const ITensorInfo *input_bboxes,
+ const ITensorInfo *input_scores,
+ const ITensorInfo *output_indices,
+ unsigned int max_output_size,
+ const float score_threshold,
+ const float iou_threshold);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
index 1245dbc14c..7326a10e2f 100644
--- a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
@@ -69,7 +69,8 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k);
+ static Status
+ validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/Coordinates.h b/arm_compute/core/Coordinates.h
index f6e1f4d282..d1240bb10a 100644
--- a/arm_compute/core/Coordinates.h
+++ b/arm_compute/core/Coordinates.h
@@ -42,8 +42,7 @@ public:
* @param[in] coords Values to initialize the dimensions.
*/
template <typename... Ts>
- constexpr Coordinates(Ts... coords)
- : Dimensions{ coords... }
+ constexpr Coordinates(Ts... coords) : Dimensions{coords...}
{
}
/** Allow instances of this class to be copy constructed */
@@ -57,5 +56,5 @@ public:
/** Default destructor */
~Coordinates() = default;
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_COORDINATES_H*/
diff --git a/arm_compute/core/CoreTypes.h b/arm_compute/core/CoreTypes.h
new file mode 100644
index 0000000000..1a9db1937c
--- /dev/null
+++ b/arm_compute/core/CoreTypes.h
@@ -0,0 +1,352 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_CORE_CORETYPES
+#define ACL_ARM_COMPUTE_CORE_CORETYPES
+
+#include "arm_compute/core/Strides.h"
+
+#include "support/Half.h"
+
+/** CoreTypes.h groups together essential small types that are used across functions */
+
+namespace arm_compute
+{
+/** 16-bit floating point type */
+using half = half_float::half;
+/** Permutation vector */
+using PermutationVector = Strides;
+
+/** Available channels */
+enum class Channel
+{
+ UNKNOWN, /** Unknown channel format */
+ C0, /**< First channel (used by formats with unknown channel types). */
+ C1, /**< Second channel (used by formats with unknown channel types). */
+ C2, /**< Third channel (used by formats with unknown channel types). */
+ C3, /**< Fourth channel (used by formats with unknown channel types). */
+ R, /**< Red channel. */
+ G, /**< Green channel. */
+ B, /**< Blue channel. */
+ A, /**< Alpha channel. */
+ Y, /**< Luma channel. */
+ U, /**< Cb/U channel. */
+ V /**< Cr/V/Value channel. */
+};
+
+/** Image colour formats */
+enum class Format
+{
+ UNKNOWN, /**< Unknown image format */
+ U8, /**< 1 channel, 1 U8 per channel */
+ S16, /**< 1 channel, 1 S16 per channel */
+ U16, /**< 1 channel, 1 U16 per channel */
+ S32, /**< 1 channel, 1 S32 per channel */
+ U32, /**< 1 channel, 1 U32 per channel */
+ S64, /**< 1 channel, 1 S64 per channel */
+ U64, /**< 1 channel, 1 U64 per channel */
+ BFLOAT16, /**< 16-bit brain floating-point number */
+ F16, /**< 1 channel, 1 F16 per channel */
+ F32, /**< 1 channel, 1 F32 per channel */
+ UV88, /**< 2 channel, 1 U8 per channel */
+ RGB888, /**< 3 channels, 1 U8 per channel */
+ RGBA8888, /**< 4 channels, 1 U8 per channel */
+ YUV444, /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */
+ YUYV422, /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */
+ NV12, /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */
+ NV21, /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */
+ IYUV, /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */
+ UYVY422 /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */
+};
+
+/** Available data types */
+enum class DataType
+{
+ UNKNOWN, /**< Unknown data type */
+ U8, /**< unsigned 8-bit number */
+ S8, /**< signed 8-bit number */
+ QSYMM8, /**< quantized, symmetric fixed-point 8-bit number */
+ QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number unsigned */
+ QASYMM8_SIGNED, /**< quantized, asymmetric fixed-point 8-bit number signed */
+ QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */
+ U16, /**< unsigned 16-bit number */
+ S16, /**< signed 16-bit number */
+ QSYMM16, /**< quantized, symmetric fixed-point 16-bit number */
+ QASYMM16, /**< quantized, asymmetric fixed-point 16-bit number */
+ U32, /**< unsigned 32-bit number */
+ S32, /**< signed 32-bit number */
+ U64, /**< unsigned 64-bit number */
+ S64, /**< signed 64-bit number */
+ BFLOAT16, /**< 16-bit brain floating-point number */
+ F16, /**< 16-bit floating-point number */
+ F32, /**< 32-bit floating-point number */
+ F64, /**< 64-bit floating-point number */
+ SIZET /**< size_t */
+};
+
+/** [DataLayout enum definition] **/
+
+/** Supported tensor data layouts */
+enum class DataLayout
+{
+ UNKNOWN, /**< Unknown data layout */
+ NCHW, /**< Num samples, channels, height, width */
+ NHWC, /**< Num samples, height, width, channels */
+ NCDHW, /**< Num samples, channels, depth, height, width */
+ NDHWC /**< Num samples, depth, height, width, channels */
+};
+/** [DataLayout enum definition] **/
+
+/** Supported tensor data layout dimensions */
+enum class DataLayoutDimension
+{
+ CHANNEL, /**< channel */
+ HEIGHT, /**< height */
+ WIDTH, /**< width */
+ DEPTH, /**< depth */
+ BATCHES /**< batches */
+};
+
+/** Dimension rounding type when down-scaling on CNNs
+ * @note Used in pooling and convolution layer
+ */
+enum class DimensionRoundingType
+{
+ FLOOR, /**< Floor rounding */
+ CEIL /**< Ceil rounding */
+};
+
+class PadStrideInfo
+{
+public:
+ /** Constructor
+ *
+ * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1.
+ * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1.
+ * @param[in] pad_x (Optional) Padding, in elements, across x. Defaults to 0.
+ * @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0.
+ * @param[in] round (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR.
+ */
+ PadStrideInfo(unsigned int stride_x = 1,
+ unsigned int stride_y = 1,
+ unsigned int pad_x = 0,
+ unsigned int pad_y = 0,
+ DimensionRoundingType round = DimensionRoundingType::FLOOR)
+ : _stride(std::make_pair(stride_x, stride_y)),
+ _pad_left(pad_x),
+ _pad_top(pad_y),
+ _pad_right(pad_x),
+ _pad_bottom(pad_y),
+ _round_type(round)
+ {
+ }
+ /** Constructor
+ *
+ * @param[in] stride_x Stride, in elements, across x.
+ * @param[in] stride_y Stride, in elements, across y.
+ * @param[in] pad_left Padding across x on the left, in elements.
+ * @param[in] pad_right Padding across x on the right, in elements.
+ * @param[in] pad_top Padding across y on the top, in elements.
+ * @param[in] pad_bottom Padding across y on the bottom, in elements.
+ * @param[in] round Dimensions rounding.
+ */
+ PadStrideInfo(unsigned int stride_x,
+ unsigned int stride_y,
+ unsigned int pad_left,
+ unsigned int pad_right,
+ unsigned int pad_top,
+ unsigned int pad_bottom,
+ DimensionRoundingType round)
+ : _stride(std::make_pair(stride_x, stride_y)),
+ _pad_left(pad_left),
+ _pad_top(pad_top),
+ _pad_right(pad_right),
+ _pad_bottom(pad_bottom),
+ _round_type(round)
+ {
+ }
+ /** Get the stride.
+ *
+ * @return a pair: stride x, stride y.
+ */
+ std::pair<unsigned int, unsigned int> stride() const
+ {
+ return _stride;
+ }
+ /** Check whether the padding is symmetric.
+ *
+ * @return True if the padding is symmetric.
+ */
+ bool padding_is_symmetric() const
+ {
+ return (_pad_left == _pad_right) && (_pad_top == _pad_bottom);
+ }
+ /** Get the padding.
+ *
+ * @note This should only be used when the padding is symmetric.
+ *
+ * @return a pair: padding left/right, padding top/bottom
+ */
+ std::pair<unsigned int, unsigned int> pad() const
+ {
+ //this accessor should be used only when padding is symmetric
+ ARM_COMPUTE_ERROR_ON(!padding_is_symmetric());
+ return std::make_pair(_pad_left, _pad_top);
+ }
+
+ /** Get the left padding */
+ unsigned int pad_left() const
+ {
+ return _pad_left;
+ }
+ /** Get the right padding */
+ unsigned int pad_right() const
+ {
+ return _pad_right;
+ }
+ /** Get the top padding */
+ unsigned int pad_top() const
+ {
+ return _pad_top;
+ }
+ /** Get the bottom padding */
+ unsigned int pad_bottom() const
+ {
+ return _pad_bottom;
+ }
+
+ /** Get the rounding type */
+ DimensionRoundingType round() const
+ {
+ return _round_type;
+ }
+
+ /** Check whether this has any padding */
+ bool has_padding() const
+ {
+ return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0);
+ }
+
+private:
+ std::pair<unsigned int, unsigned int> _stride;
+ unsigned int _pad_left;
+ unsigned int _pad_top;
+ unsigned int _pad_right;
+ unsigned int _pad_bottom;
+
+ DimensionRoundingType _round_type;
+};
+
+/** Memory layouts for the weights tensor.
+ *
+ * * UNSPECIFIED is used to select kernels that do not run in
+ * variable weights mode.
+ *
+ * * ANY is used to query the kernel database to retrieve any of the
+ * kernels that runs in variable weights mode. Once a kernel is
+ * found, the specific format expected by the kernel can be
+ * retrieved by the user for reordering the weights tensor
+ * accordingly.
+ *
+ * The other values OHWIo{interleave_by}i{block_by} describe the
+ * memory layout of a 4D tensor with layout OHWI that has been
+ * transformed into a 4D tensor with dimensions O'HWI' where:
+ *
+ * O' = first multiple of {interleave_by} s.t. O<=O'
+ * I' = first multiple of {block_by} s.t. I<=I'
+ *
+ * The total size of the dst tensor is O' x H x W x I'
+ *
+ * The access function of the tensor with layout
+ * OHWIo{interleave_by}i{block_by} and size O'HWI' is a 6-parameter
+ * access function, where the 6 parameters are computed as follows:
+ *
+ * x5 = floor(o/{interleave_by}) RANGE [0, O'/{interleave_by} -1] SIZE: O'/{interleave_by}
+ *
+ * x4 = h RANGE [0, H-1] SIZE: H
+ * x3 = w RANGE [0, W-1] SIZE: W
+ * x2 = floor(i/{block_by}) RANGE [0, I'/{block_by} -1] SIZE: I'/{block_by}
+ * x1 = o%{interleave_by} RANGE [0, {interleave_by} -1] SIZE: {interleave_by}
+ * x0 = i%{block_by} RANGE [0, {block_by} -1] SIZE: {block_by}
+ * TOTAL SIZE: O' * H * W * I'
+ *
+ * 4D 6D
+ * ----------------- -----------------------------------
+ * value(o, h, w, i) = x5 * H * W * I' * {interleave_by}
+ * + x4 * W * I' * {interleave_by}
+ * + x3 * I' * {interleave_by}
+ * + x2 * {interleave_by} * {block_by}
+ * + x1 * {block_by}
+ * + x0
+ *
+ * Notice that in arm_gemm the 4D tensor of dimension O'HWI' created
+ * for the OHWIo{interleave_by}i{block_by} format is in reality seen
+ * as a 2D tensor, where the number of rows is O'/{interleave_by}
+ * and the number of columns is {interleave_by} * H * W * I'.
+ *
+ * The postfix *_bf16 is for the memory layout needed for the
+ * fast-mode kernels, in which the weights are passed in bfloat16
+ * format.
+ */
+enum class WeightFormat
+{
+ UNSPECIFIED = 0x1,
+ ANY = 0x2,
+ OHWI = 0x100100,
+ OHWIo2 = 0x100200,
+ OHWIo4 = 0x100400,
+ OHWIo8 = 0x100800,
+ OHWIo16 = 0x101000,
+ OHWIo32 = 0x102000,
+ OHWIo64 = 0x104000,
+ OHWIo128 = 0x108000,
+ OHWIo4i2 = 0x200400,
+ OHWIo4i2_bf16 = 0x200410,
+ OHWIo8i2 = 0x200800,
+ OHWIo8i2_bf16 = 0x200810,
+ OHWIo16i2 = 0x201000,
+ OHWIo16i2_bf16 = 0x201010,
+ OHWIo32i2 = 0x202000,
+ OHWIo32i2_bf16 = 0x202010,
+ OHWIo64i2 = 0x204000,
+ OHWIo64i2_bf16 = 0x204010,
+ OHWIo4i4 = 0x400400,
+ OHWIo4i4_bf16 = 0x400410,
+ OHWIo8i4 = 0x400800,
+ OHWIo8i4_bf16 = 0x400810,
+ OHWIo16i4 = 0x401000,
+ OHWIo16i4_bf16 = 0x401010,
+ OHWIo32i4 = 0x402000,
+ OHWIo32i4_bf16 = 0x402010,
+ OHWIo64i4 = 0x404000,
+ OHWIo64i4_bf16 = 0x404010,
+ OHWIo2i8 = 0x800200,
+ OHWIo4i8 = 0x800400,
+ OHWIo8i8 = 0x800800,
+ OHWIo16i8 = 0x801000,
+ OHWIo32i8 = 0x802000,
+ OHWIo64i8 = 0x804000
+};
+
+} // namespace arm_compute
+#endif /* ACL_ARM_COMPUTE_CORE_CORETYPES */
diff --git a/arm_compute/core/Dimensions.h b/arm_compute/core/Dimensions.h
index 2ebfcd7f83..bb8692d70a 100644
--- a/arm_compute/core/Dimensions.h
+++ b/arm_compute/core/Dimensions.h
@@ -50,8 +50,7 @@ public:
* @param[in] dims Values to initialize the dimensions.
*/
template <typename... Ts>
- explicit Dimensions(Ts... dims)
- : _id{ { static_cast<T>(dims)... } }, _num_dimensions{ sizeof...(dims) }
+ explicit Dimensions(Ts... dims) : _id{{static_cast<T>(dims)...}}, _num_dimensions{sizeof...(dims)}
{
}
@@ -78,7 +77,7 @@ public:
ARM_COMPUTE_ERROR_ON(dimension >= num_max_dimensions);
_id[dimension] = value;
// Don't increase the number of dimensions if the new dimension is 1
- if(increase_dim_unit || value != 1)
+ if (increase_dim_unit || value != 1)
{
_num_dimensions = std::max(_num_dimensions, dimension + 1);
}
@@ -108,7 +107,7 @@ public:
void increment(size_t dim, T step = 1)
{
ARM_COMPUTE_ERROR_ON(dim >= _num_dimensions);
- if((std::numeric_limits<T>::max() - _id[dim]) >= step)
+ if ((std::numeric_limits<T>::max() - _id[dim]) >= step)
{
_id[dim] += step;
}
@@ -162,7 +161,7 @@ public:
const size_t last = std::min(_num_dimensions, first + n);
- if(last > (first + 1))
+ if (last > (first + 1))
{
// Collapse dimensions into the first
_id[first] = std::accumulate(&_id[first], &_id[last], 1, std::multiplies<T>());
@@ -196,7 +195,7 @@ public:
void remove(size_t idx)
{
ARM_COMPUTE_ERROR_ON(_num_dimensions < 1);
- if(idx >= _num_dimensions)
+ if (idx >= _num_dimensions)
{
return;
}
@@ -262,7 +261,7 @@ protected:
~Dimensions() = default;
std::array<T, num_max_dimensions> _id;
- size_t _num_dimensions{ 0 };
+ size_t _num_dimensions{0};
};
/** Check that given dimensions are equal.
@@ -289,5 +288,5 @@ inline bool operator!=(const Dimensions<T> &lhs, const Dimensions<T> &rhs)
{
return !(lhs == rhs);
}
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_DIMENSIONS_H*/
diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h
index 992d6bc71f..7a7033805a 100644
--- a/arm_compute/core/Error.h
+++ b/arm_compute/core/Error.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2019, 2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -53,8 +53,7 @@ class Status
{
public:
/** Default Constructor **/
- Status()
- : _code(ErrorCode::OK), _error_description(" ")
+ Status() : _code(ErrorCode::OK), _error_description(" ")
{
}
/** Default Constructor
@@ -101,7 +100,7 @@ public:
/** Throws a runtime exception in case it contains a valid error status */
void throw_if_error() const
{
- if(!bool(*this))
+ if (!bool(*this))
{
internal_throw_on_error();
}
@@ -119,7 +118,7 @@ private:
/** Creates an error containing the error message
*
* @param[in] error_code Error code
- * @param[in] msg Message to display before aborting.
+ * @param[in] msg Message to display before abandoning.
*
* @return status containing the error
*/
@@ -131,7 +130,7 @@ Status create_error(ErrorCode error_code, std::string msg);
* @param[in] func Function in which the error occurred.
* @param[in] file File in which the error occurred.
* @param[in] line Line in which the error occurred.
- * @param[in] msg Message to display before aborting.
+ * @param[in] msg Message to display before abandoning.
*
* @return status containing the error
*/
@@ -141,7 +140,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] err Error status
*/
[[noreturn]] void throw_error(Status err);
-}
+} // namespace arm_compute
/** To avoid unused variables warnings
*
* This is useful if for example a variable is only used
@@ -156,7 +155,8 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] error_code Error code.
* @param[in] msg Message to encapsulate.
*/
-#define ARM_COMPUTE_CREATE_ERROR(error_code, msg) arm_compute::create_error_msg(error_code, __func__, __FILE__, __LINE__, msg)
+#define ARM_COMPUTE_CREATE_ERROR(error_code, msg) \
+ arm_compute::create_error_msg(error_code, __func__, __FILE__, __LINE__, msg)
/** Creates an error on location with a given message
*
@@ -164,9 +164,10 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] func Function in which the error occurred.
* @param[in] file File in which the error occurred.
* @param[in] line Line in which the error occurred.
- * @param[in] msg Message to display before aborting.
+ * @param[in] msg Message to display before abandoning.
*/
-#define ARM_COMPUTE_CREATE_ERROR_LOC(error_code, func, file, line, msg) arm_compute::create_error_msg(error_code, func, file, line, msg)
+#define ARM_COMPUTE_CREATE_ERROR_LOC(error_code, func, file, line, msg) \
+ arm_compute::create_error_msg(error_code, func, file, line, msg)
/** Creates an error on location with a given message. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -178,14 +179,14 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] msg Error description message format.
* @param[in] ... List of arguments matching the format description.
*/
-#define ARM_COMPUTE_CREATE_ERROR_LOC_VAR(error_code, func, file, line, msg, ...) \
- do \
- { \
- std::array<char, 512> out{ 0 }; \
- int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
- snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
- arm_compute::create_error(error_code, std::string(out.data())); \
- } while(false)
+#define ARM_COMPUTE_CREATE_ERROR_LOC_VAR(error_code, func, file, line, msg, ...) \
+ do \
+ { \
+ std::array<char, 512> out{0}; \
+ int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
+ snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
+ arm_compute::create_error(error_code, std::string(out.data())); \
+ } while (false)
/** An error is returned with the given description.
*
@@ -195,7 +196,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
do \
{ \
return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, __VA_ARGS__); \
- } while(false)
+ } while (false)
/** Checks if a status contains an error and returns it
*
@@ -204,18 +205,18 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_RETURN_ON_ERROR(status) \
do \
{ \
- if(!bool(status)) \
+ const auto s = status; \
+ if (!bool(s)) \
{ \
- return status; \
+ return s; \
} \
- } while(false)
+ } while (false)
/** Checks if an error value is valid if not throws an exception with the error
*
* @param[in] error Error value to check.
*/
-#define ARM_COMPUTE_THROW_ON_ERROR(error) \
- error.throw_if_error();
+#define ARM_COMPUTE_THROW_ON_ERROR(error) error.throw_if_error();
/** If the condition is true, an error is returned. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -227,28 +228,29 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG_VAR(cond, msg, ...) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
- std::array<char, 512> out{ 0 }; \
+ std::array<char, 512> out{0}; \
int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", __func__, __FILE__, __LINE__); \
snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
return arm_compute::create_error(arm_compute::ErrorCode::RUNTIME_ERROR, std::string(out.data())); \
} \
- } while(false)
+ } while (false)
/** If the condition is true, an error is returned
*
* @param[in] cond Condition to evaluate.
* @param[in] msg Error description message
*/
-#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg) \
- do \
- { \
- if(cond) \
- { \
- return arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, __func__, __FILE__, __LINE__, msg); \
- } \
- } while(false)
+#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg) \
+ do \
+ { \
+ if (cond) \
+ { \
+ return arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, __func__, __FILE__, __LINE__, \
+ msg); \
+ } \
+ } while (false)
/** If the condition is true, an error is thrown. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -260,17 +262,17 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] msg Error description message format.
* @param[in] ... List of arguments matching the format description.
*/
-#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(cond, func, file, line, msg, ...) \
- do \
- { \
- if(cond) \
- { \
- std::array<char, 512> out{ 0 }; \
- int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
- snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
- return arm_compute::create_error(ErrorCode::RUNTIME_ERROR, std::string(out.data())); \
- } \
- } while(false)
+#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(cond, func, file, line, msg, ...) \
+ do \
+ { \
+ if (cond) \
+ { \
+ std::array<char, 512> out{0}; \
+ int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
+ snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
+ return arm_compute::create_error(ErrorCode::RUNTIME_ERROR, std::string(out.data())); \
+ } \
+ } while (false)
/** If the condition is true, an error is thrown.
*
@@ -283,18 +285,17 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(cond, func, file, line, msg) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
return arm_compute::create_error_msg(ErrorCode::RUNTIME_ERROR, func, file, line, msg); \
} \
- } while(false)
+ } while (false)
/** If the condition is true, an error is returned
*
* @param[in] cond Condition to evaluate
*/
-#define ARM_COMPUTE_RETURN_ERROR_ON(cond) \
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, #cond)
+#define ARM_COMPUTE_RETURN_ERROR_ON(cond) ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, #cond)
/** If the condition is true, an error is returned
*
@@ -313,11 +314,12 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] line Line in which the error occurred.
* @param[in] msg Message to display.
*/
-#define ARM_COMPUTE_THROW_ERROR(func, file, line, msg) \
- do \
- { \
- arm_compute::throw_error(arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, func, file, line, msg)); \
- } while(false)
+#define ARM_COMPUTE_THROW_ERROR(func, file, line, msg) \
+ do \
+ { \
+ arm_compute::throw_error( \
+ arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, func, file, line, msg)); \
+ } while (false)
/** Print the given message then throw an std::runtime_error. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -331,11 +333,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, ...) \
do \
{ \
- std::array<char, 512> out{ 0 }; \
- int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
+ std::array<char, 512> out{0}; \
+ int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \
arm_compute::throw_error(arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, std::string(out.data()))); \
- } while(false)
+ } while (false)
/** Print the given message then throw an std::runtime_error. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -360,7 +362,8 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] msg Error description message format.
* @param[in] ... List of arguments matching the format description.
*/
-#define ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, msg, ...) ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, __VA_ARGS__) // NOLINT
+#define ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, msg, ...) \
+ ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, __VA_ARGS__) // NOLINT
/** Print the given message then throw an std::runtime_error.
*
@@ -379,11 +382,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_EXIT_ON_MSG(cond, msg) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
ARM_COMPUTE_ERROR(msg); \
} \
- } while(false)
+ } while (false)
/** If the condition is true, the given message is printed and program exits. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -395,27 +398,25 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, ...) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
ARM_COMPUTE_ERROR_VAR(msg, __VA_ARGS__); \
} \
- } while(false)
+ } while (false)
#ifdef ARM_COMPUTE_ASSERTS_ENABLED
/** Checks if a status value is valid if not throws an exception with the error
*
* @param[in] status Status value to check.
*/
-#define ARM_COMPUTE_ERROR_THROW_ON(status) \
- status.throw_if_error()
+#define ARM_COMPUTE_ERROR_THROW_ON(status) status.throw_if_error()
/** If the condition is true, the given message is printed and an exception is thrown
*
* @param[in] cond Condition to evaluate.
* @param[in] msg Message to display.
*/
-#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg) \
- ARM_COMPUTE_EXIT_ON_MSG(cond, msg)
+#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg) ARM_COMPUTE_EXIT_ON_MSG(cond, msg)
/** If the condition is true, the given message is printed and an exception is thrown. Accepts a message format
* and a variable list of arguments matching the format description.
@@ -424,8 +425,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
* @param[in] msg Error description message format.
* @param[in] ... List of arguments matching the format description.
*/
-#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg, ...) \
- ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, __VA_ARGS__)
+#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg, ...) ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, __VA_ARGS__)
/** If the condition is true, the given message is printed and an exception is thrown.
*
@@ -438,11 +438,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
#define ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, ...) \
do \
{ \
- if(cond) \
+ if (cond) \
{ \
ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, __VA_ARGS__); \
} \
- } while(false)
+ } while (false)
/** If the condition is true, the given message is printed and an exception is thrown, otherwise value is returned
*
@@ -463,8 +463,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
*
* @param[in] cond Condition to evaluate.
*/
-#define ARM_COMPUTE_ERROR_ON(cond) \
- ARM_COMPUTE_ERROR_ON_MSG(cond, #cond)
+#define ARM_COMPUTE_ERROR_ON(cond) ARM_COMPUTE_ERROR_ON_MSG(cond, #cond)
/** If the condition is true then an error message is printed and an exception thrown
*
diff --git a/arm_compute/core/GPUTarget.h b/arm_compute/core/GPUTarget.h
index d9994b6cf0..b107a52d9f 100644
--- a/arm_compute/core/GPUTarget.h
+++ b/arm_compute/core/GPUTarget.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_GPUTARGET_H
-#define ARM_COMPUTE_GPUTARGET_H
+#ifndef ACL_ARM_COMPUTE_CORE_GPUTARGET_H
+#define ACL_ARM_COMPUTE_CORE_GPUTARGET_H
#include "support/Traits.h"
@@ -33,25 +33,38 @@ namespace arm_compute
/** Available GPU Targets */
enum class GPUTarget
{
- UNKNOWN = 0x101,
- GPU_ARCH_MASK = 0xF00,
- MIDGARD = 0x100,
- BIFROST = 0x200,
- VALHALL = 0x300,
- T600 = 0x110,
- T700 = 0x120,
- T800 = 0x130,
- G71 = 0x210,
- G72 = 0x220,
- G51 = 0x230,
- G51BIG = 0x231,
- G51LIT = 0x232,
- G52 = 0x240,
- G52LIT = 0x241,
- G76 = 0x250,
- G77 = 0x310,
- G78 = 0x320,
- TODX = 0x330,
+ UNKNOWN = 0x101,
+ GPU_ARCH_MASK = 0xF00,
+ GPU_GENERATION_MASK = 0x0F0,
+ MIDGARD = 0x100,
+ BIFROST = 0x200,
+ VALHALL = 0x300,
+ FIFTHGEN = 0X400,
+ T600 = 0x110,
+ T700 = 0x120,
+ T800 = 0x130,
+ G71 = 0x210,
+ G72 = 0x220,
+ G51 = 0x221,
+ G51BIG = 0x222,
+ G51LIT = 0x223,
+ G31 = 0x224,
+ G76 = 0x230,
+ G52 = 0x231,
+ G52LIT = 0x232,
+ G77 = 0x310,
+ G57 = 0x311,
+ G78 = 0x320,
+ G68 = 0x321,
+ G78AE = 0x330,
+ G710 = 0x340,
+ G610 = 0x341,
+ G510 = 0x342,
+ G310 = 0x343,
+ G715 = 0x350,
+ G615 = 0x351,
+ G720 = 0x410,
+ G620 = 0X411
};
/** Enable bitwise operations on GPUTarget enumerations */
@@ -104,4 +117,4 @@ inline bool gpu_target_is_in(GPUTarget target_to_check, GPUTarget target)
return target_to_check == target;
}
} // namespace arm_compute
-#endif /* ARM_COMPUTE_GPUTARGET_H */
+#endif // ACL_ARM_COMPUTE_CORE_GPUTARGET_H
diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h
index b6635aba6d..960201510a 100644
--- a/arm_compute/core/Helpers.h
+++ b/arm_compute/core/Helpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -55,6 +55,16 @@ public:
*/
Iterator(const ITensor *tensor, const Window &window);
+ /** Create a container iterator for the tensor with the specified number of dimensions, stride, buffer pointer and window.
+ *
+ * @param[in] num_dims The number of dimensions.
+ * @param[in] strides The strides in bytes.
+ * @param[in] buffer The data buffer.
+ * @param[in] offset The offset in bytes from the beginning of the buffer to the first element of the tensor.
+ * @param[in] window The window which will be used to iterate over the tensor.
+ */
+ Iterator(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &window);
+
/** Increment the iterator along the specified dimension of the step value associated to the dimension.
*
* @warning It is the caller's responsibility to call increment(dimension+1) when reaching the end of a dimension, the iterator will not check for overflow.
@@ -86,13 +96,22 @@ public:
void reset(size_t dimension);
private:
+ /** Initialize a container iterator for the tensor with the specified number of dimensions, stride, buffer pointer and window.
+ *
+ * @param[in] num_dims The number of dimensions.
+ * @param[in] strides The strides in bytes.
+ * @param[in] buffer The data buffer.
+ * @param[in] offset The offset in bytes from the beginning of the buffer to the first element of the tensor.
+ * @param[in] window The window which will be used to iterate over the tensor.
+ */
+ void initialize(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &window);
+
uint8_t *_ptr;
class Dimension
{
public:
- constexpr Dimension()
- : _dim_start(0), _stride(0)
+ constexpr Dimension() : _dim_start(0), _stride(0)
{
}
@@ -112,7 +131,7 @@ private:
* @param[in,out] iterators Tensor iterators which will be updated by this function before calling lambda_function.
*/
template <typename L, typename... Ts>
-inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators);
+inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators);
/** Permutes given Dimensions according to a permutation vector
*
@@ -125,7 +144,7 @@ template <typename T>
inline void permute(Dimensions<T> &dimensions, const PermutationVector &perm)
{
auto dimensions_copy = utility::make_array<Dimensions<T>::num_max_dimensions>(dimensions.begin(), dimensions.end());
- for(unsigned int i = 0; i < perm.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < perm.num_dimensions(); ++i)
{
T dimension_val = (perm[i] < dimensions.num_dimensions()) ? dimensions_copy[perm[i]] : 0;
dimensions.set(i, dimension_val);
@@ -142,7 +161,7 @@ inline void permute(Dimensions<T> &dimensions, const PermutationVector &perm)
inline void permute(TensorShape &shape, const PermutationVector &perm)
{
TensorShape shape_copy = shape;
- for(unsigned int i = 0; i < perm.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < perm.num_dimensions(); ++i)
{
size_t dimension_val = (perm[i] < shape.num_dimensions()) ? shape_copy[perm[i]] : 1;
shape.set(i, dimension_val, false, false); // Avoid changes in _num_dimension
@@ -159,8 +178,11 @@ inline void permute(TensorShape &shape, const PermutationVector &perm)
*
* @return The corresponding valid region
*/
-ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info, const TensorShape &dst_shape,
- InterpolationPolicy interpolate_policy, SamplingPolicy sampling_policy, bool border_undefined);
+ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info,
+ const TensorShape &dst_shape,
+ InterpolationPolicy interpolate_policy,
+ SamplingPolicy sampling_policy,
+ bool border_undefined);
/** Convert a linear index into n-dimensional coordinates.
*
@@ -180,6 +202,22 @@ inline Coordinates index2coords(const TensorShape &shape, int index);
*/
inline int coords2index(const TensorShape &shape, const Coordinates &coord);
+/** Returns a static map used to find an index or dimension based on a data layout
+ *
+ * *** Layouts ***
+ *
+ * *** 4D ***
+ * [N C H W]
+ * [3 2 1 0]
+ * [N H W C]
+ *
+ * * *** 5D ***
+ * [N C D H W]
+ * [4 3 2 1 0]
+ * [N D H W C]
+ */
+const std::map<DataLayout, std::vector<DataLayoutDimension>> &get_layout_map();
+
/** Get the index of the given dimension.
*
* @param[in] data_layout The data layout.
@@ -187,7 +225,8 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord);
*
* @return The int conversion of the requested data layout index.
*/
-inline size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension);
+inline size_t get_data_layout_dimension_index(const DataLayout &data_layout,
+ const DataLayoutDimension &data_layout_dimension);
/** Get the DataLayoutDimension of a given index and layout.
*
@@ -196,7 +235,7 @@ inline size_t get_data_layout_dimension_index(const DataLayout data_layout, cons
*
* @return The dimension which this index is requested for.
*/
-inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout data_layout, const size_t index);
+inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout &data_layout, const size_t index);
/** Calculate the number of output tiles required by Winograd Convolution layer. This utility function can be used by the Winograd input transform
* to know the number of tiles on the x and y direction
@@ -208,10 +247,17 @@ inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout data
*
* @return the number of output tiles along the x and y directions of size "output_tile_size"
*/
-inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims, const Size2D &kernel_size, const Size2D &output_tile_size, const PadStrideInfo &conv_info)
+inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims,
+ const Size2D &kernel_size,
+ const Size2D &output_tile_size,
+ const PadStrideInfo &conv_info)
{
- int num_tiles_x = std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast<float>(output_tile_size.width));
- int num_tiles_y = std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast<float>(output_tile_size.height));
+ int num_tiles_x =
+ std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) /
+ static_cast<float>(output_tile_size.width));
+ int num_tiles_y =
+ std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) /
+ static_cast<float>(output_tile_size.height));
// Clamp in case we provide paddings but we have 1D convolution
num_tiles_x = std::min(num_tiles_x, static_cast<int>(in_dims.width));
@@ -240,7 +286,7 @@ inline T wrap_around(T x, T m)
*/
inline Coordinates &convert_negative_axis(Coordinates &coords, int max_value)
{
- for(unsigned int i = 0; i < coords.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < coords.num_dimensions(); ++i)
{
coords[i] = wrap_around(coords[i], max_value);
}
diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl
index a960876074..60a21e9418 100644
--- a/arm_compute/core/Helpers.inl
+++ b/arm_compute/core/Helpers.inl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,12 +32,9 @@ template <size_t dimension>
struct IncrementIterators
{
template <typename T, typename... Ts>
- static void unroll(T &&it, Ts &&... iterators)
+ static void unroll(T &&it, Ts &&...iterators)
{
- auto increment = [](T && it)
- {
- it.increment(dimension);
- };
+ auto increment = [](T &&it) { it.increment(dimension); };
utility::for_each(increment, std::forward<T>(it), std::forward<Ts>(iterators)...);
}
static void unroll()
@@ -50,14 +47,14 @@ template <size_t dim>
struct ForEachDimension
{
template <typename L, typename... Ts>
- static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators)
+ static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&...iterators)
{
const auto &d = w[dim - 1];
- for(auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators < dim - 1 >::unroll(iterators...))
+ for (auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators<dim - 1>::unroll(iterators...))
{
id.set(dim - 1, v);
- ForEachDimension < dim - 1 >::unroll(w, id, lambda_function, iterators...);
+ ForEachDimension<dim - 1>::unroll(w, id, lambda_function, iterators...);
}
}
};
@@ -66,7 +63,7 @@ template <>
struct ForEachDimension<0>
{
template <typename L, typename... Ts>
- static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators)
+ static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&...iterators)
{
ARM_COMPUTE_UNUSED(w, iterators...);
lambda_function(id);
@@ -74,49 +71,60 @@ struct ForEachDimension<0>
};
template <typename L, typename... Ts>
-inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
+inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
{
w.validate();
- for(unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
+ for (unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
{
ARM_COMPUTE_ERROR_ON(w[i].step() == 0);
}
Coordinates id;
- ForEachDimension<Coordinates::num_max_dimensions>::unroll(w, id, std::forward<L>(lambda_function), std::forward<Ts>(iterators)...);
+ ForEachDimension<Coordinates::num_max_dimensions>::unroll(w, id, std::forward<L>(lambda_function),
+ std::forward<Ts>(iterators)...);
}
-inline constexpr Iterator::Iterator()
- : _ptr(nullptr), _dims()
+inline constexpr Iterator::Iterator() : _ptr(nullptr), _dims()
{
}
-inline Iterator::Iterator(const ITensor *tensor, const Window &win)
- : Iterator()
+inline Iterator::Iterator(const ITensor *tensor, const Window &win) : Iterator()
{
ARM_COMPUTE_ERROR_ON(tensor == nullptr);
ARM_COMPUTE_ERROR_ON(tensor->info() == nullptr);
- const ITensorInfo *info = tensor->info();
- const Strides &strides = info->strides_in_bytes();
+ initialize(tensor->info()->num_dimensions(), tensor->info()->strides_in_bytes(), tensor->buffer(),
+ tensor->info()->offset_first_element_in_bytes(), win);
+}
+
+inline Iterator::Iterator(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &win)
+ : Iterator()
+{
+ initialize(num_dims, strides, buffer, offset, win);
+}
- _ptr = tensor->buffer() + info->offset_first_element_in_bytes();
+inline void
+Iterator::initialize(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &win)
+{
+ ARM_COMPUTE_ERROR_ON(buffer == nullptr);
+
+ _ptr = buffer + offset;
//Initialize the stride for each dimension and calculate the position of the first element of the iteration:
- for(unsigned int n = 0; n < info->num_dimensions(); ++n)
+ for (unsigned int n = 0; n < num_dims; ++n)
{
_dims[n]._stride = win[n].step() * strides[n];
std::get<0>(_dims)._dim_start += static_cast<size_t>(strides[n]) * win[n].start();
}
//Copy the starting point to all the dimensions:
- for(unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n)
+ for (unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n)
{
_dims[n]._dim_start = std::get<0>(_dims)._dim_start;
}
- ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(win, info->num_dimensions());
+ ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(win, num_dims);
}
inline void Iterator::increment(const size_t dimension)
@@ -125,7 +133,7 @@ inline void Iterator::increment(const size_t dimension)
_dims[dimension]._dim_start += _dims[dimension]._stride;
- for(unsigned int n = 0; n < dimension; ++n)
+ for (unsigned int n = 0; n < dimension; ++n)
{
_dims[n]._dim_start = _dims[dimension]._dim_start;
}
@@ -147,7 +155,7 @@ inline void Iterator::reset(const size_t dimension)
_dims[dimension]._dim_start = _dims[dimension + 1]._dim_start;
- for(unsigned int n = 0; n < dimension; ++n)
+ for (unsigned int n = 0; n < dimension; ++n)
{
_dims[n]._dim_start = _dims[dimension]._dim_start;
}
@@ -160,9 +168,9 @@ inline Coordinates index2coords(const TensorShape &shape, int index)
ARM_COMPUTE_ERROR_ON_MSG(index < 0 || index >= num_elements, "Index has to be in [0, num_elements]!");
ARM_COMPUTE_ERROR_ON_MSG(num_elements == 0, "Cannot create coordinate from empty shape!");
- Coordinates coord{ 0 };
+ Coordinates coord{0};
- for(int d = shape.num_dimensions() - 1; d >= 0; --d)
+ for (int d = shape.num_dimensions() - 1; d >= 0; --d)
{
num_elements /= shape[d];
coord.set(d, index / num_elements);
@@ -181,7 +189,7 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord)
int index = 0;
int stride = 1;
- for(unsigned int d = 0; d < coord.num_dimensions(); ++d)
+ for (unsigned int d = 0; d < coord.num_dimensions(); ++d)
{
index += coord[d] * stride;
stride *= shape[d];
@@ -190,61 +198,23 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord)
return index;
}
-inline size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
+inline size_t get_data_layout_dimension_index(const DataLayout &data_layout,
+ const DataLayoutDimension &data_layout_dimension)
{
- ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, "Cannot retrieve the dimension index for an unknown layout!");
-
- /* Return the index based on the data layout
- * [N C H W]
- * [3 2 1 0]
- * [N H W C]
- */
- switch(data_layout_dimension)
- {
- case DataLayoutDimension::CHANNEL:
- return (data_layout == DataLayout::NCHW) ? 2 : 0;
- break;
- case DataLayoutDimension::HEIGHT:
- return (data_layout == DataLayout::NCHW) ? 1 : 2;
- break;
- case DataLayoutDimension::WIDTH:
- return (data_layout == DataLayout::NCHW) ? 0 : 1;
- break;
- case DataLayoutDimension::BATCHES:
- return 3;
- break;
- default:
- break;
- }
- ARM_COMPUTE_ERROR("Data layout index not supported!");
+ ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN,
+ "Cannot retrieve the dimension index for an unknown layout!");
+ const auto &dims = get_layout_map().at(data_layout);
+ const auto &it = std::find(dims.cbegin(), dims.cend(), data_layout_dimension);
+ ARM_COMPUTE_ERROR_ON_MSG(it == dims.cend(), "Invalid dimension for the given layout.");
+ return it - dims.cbegin();
}
-inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout data_layout, const size_t index)
+inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout &data_layout, const size_t index)
{
- ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, "Cannot retrieve the dimension index for an unknown layout!");
-
- /* Return the index based on the data layout
- * [N C H W]
- * [3 2 1 0]
- * [N H W C]
- */
- switch(index)
- {
- case 0:
- return (data_layout == DataLayout::NCHW) ? DataLayoutDimension::WIDTH : DataLayoutDimension::CHANNEL;
- break;
- case 1:
- return (data_layout == DataLayout::NCHW) ? DataLayoutDimension::HEIGHT : DataLayoutDimension::WIDTH;
- break;
- case 2:
- return (data_layout == DataLayout::NCHW) ? DataLayoutDimension::CHANNEL : DataLayoutDimension::HEIGHT;
- break;
- case 3:
- return DataLayoutDimension::BATCHES;
- break;
- default:
- ARM_COMPUTE_ERROR("Index value not supported!");
- break;
- }
+ ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN,
+ "Cannot retrieve the layout dimension for an unknown layout!");
+ const auto &dims = get_layout_map().at(data_layout);
+ ARM_COMPUTE_ERROR_ON_MSG(index >= dims.size(), "Invalid index for the given layout.");
+ return dims[index];
}
} // namespace arm_compute
diff --git a/arm_compute/core/IAccessWindow.h b/arm_compute/core/IAccessWindow.h
index 880f6d6b27..9c9fb90915 100644
--- a/arm_compute/core/IAccessWindow.h
+++ b/arm_compute/core/IAccessWindow.h
@@ -100,7 +100,10 @@ public:
* @return a valid region.
*
*/
- virtual ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const = 0;
+ virtual ValidRegion compute_valid_region(const Window &window,
+ ValidRegion input_valid_region,
+ bool border_undefined,
+ BorderSize border_size) const = 0;
};
/** Implementation of a rectangular access pattern. */
@@ -161,7 +164,10 @@ public:
* @param[in] border_undefined (Optional) Undefined borders are excluded from the valid region.
* @param[in] border_size (Optional) Size of the border around the XY-plane of the tensor.
*/
- void set_valid_region(const Window &window, const ValidRegion &input_valid_region, bool border_undefined = false, const BorderSize &border_size = BorderSize(0));
+ void set_valid_region(const Window &window,
+ const ValidRegion &input_valid_region,
+ bool border_undefined = false,
+ const BorderSize &border_size = BorderSize(0));
/** Compute the valid region based on access pattern, valid region of the inputs and border mode.
*
@@ -189,7 +195,10 @@ public:
* @return a valid region.
*
*/
- ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
+ ValidRegion compute_valid_region(const Window &window,
+ ValidRegion input_valid_region,
+ bool border_undefined,
+ BorderSize border_size) const override;
bool update_window_if_needed(Window &window) const override;
bool update_padding_if_needed(const Window &window) override;
diff --git a/arm_compute/core/IArray.h b/arm_compute/core/IArray.h
index 6edbc1d5d5..3471fc9a86 100644
--- a/arm_compute/core/IArray.h
+++ b/arm_compute/core/IArray.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_IARRAY_H
#include "arm_compute/core/Error.h"
+
#include <cstddef>
#include <cstdint>
@@ -36,14 +37,12 @@ class IArray
{
public:
/** Default constructor */
- IArray()
- : _num_values(0), _max_size(0) {};
+ IArray() : _num_values(0), _max_size(0){};
/** Constructor: initializes an array which can contain up to max_num_points values
*
* @param[in] max_num_values Maximum number of values the array will be able to stored
*/
- IArray(size_t max_num_values)
- : _num_values(0), _max_size(max_num_values)
+ IArray(size_t max_num_values) : _num_values(0), _max_size(max_num_values)
{
}
/** Maximum number of values which can be stored in this array
@@ -73,7 +72,7 @@ public:
bool push_back(const T &val)
{
ARM_COMPUTE_ERROR_ON(0 == _max_size);
- if(_num_values >= max_num_values())
+ if (_num_values >= max_num_values())
{
_num_values = max_num_values() + 1;
return false;
@@ -142,5 +141,5 @@ using IInt16Array = IArray<int16_t>;
using IInt32Array = IArray<int32_t>;
/** Interface for Array of floats. */
using IFloatArray = IArray<float>;
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_IARRAY_H */
diff --git a/arm_compute/core/IKernel.h b/arm_compute/core/IKernel.h
index 98fd18cc91..403a2c724e 100644
--- a/arm_compute/core/IKernel.h
+++ b/arm_compute/core/IKernel.h
@@ -73,5 +73,5 @@ protected:
private:
Window _window;
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_IKERNEL_H */
diff --git a/arm_compute/core/ITensor.h b/arm_compute/core/ITensor.h
index 131ee205ea..aad8313261 100644
--- a/arm_compute/core/ITensor.h
+++ b/arm_compute/core/ITensor.h
@@ -90,11 +90,13 @@ public:
bool is_used() const;
/** Marks a tensor as unused */
void mark_as_unused() const;
+ /** Marks a tensor as used */
+ void mark_as_used() const;
private:
- mutable bool _is_used = { true }; /**< Flag that marks if the tensor is used or not */
+ mutable bool _is_used = {true}; /**< Flag that marks if the tensor is used or not */
};
using IImage = ITensor;
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_ITENSOR_H */
diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h
index 0171e31086..c42f4b57a1 100644
--- a/arm_compute/core/ITensorInfo.h
+++ b/arm_compute/core/ITensorInfo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,19 +28,28 @@
#include "arm_compute/core/Strides.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/Utility.h"
+
#include "support/ICloneable.h"
#include <cstddef>
namespace arm_compute
{
+class QuantizationInfo;
+// Note: Any changes to the fields of the class below that have setters should be mirrored
+// (if possible) in the auto_init_if_empty function in AutoConfiguration.h
+
/** Store the tensor's metadata */
class ITensorInfo : public misc::ICloneable<ITensorInfo>
{
public:
- using TensorDimsState = Coordinates;
+ using TensorDimsState = std::vector<int>;
+ /** An id that uniquely identifies an ITensorInfo within some domain (e.g. a workload)
+ */
+ using Id = int32_t;
+ /** An invalid tensor id within a domain */
+ static constexpr Id invalid_tensor_id = 0;
/** Get the value representing dynamic dimension state
*
* @return Value representing dynamic dimension state
@@ -137,6 +146,17 @@ public:
* @return True if the strides or the offset to the first element have changed.
*/
virtual bool auto_padding() = 0;
+ /** Set the lock paddings flag of the tensor.
+ * It should be set to True, when the tensor could be mapped to camera or frame buffer.
+ *
+ * @return Reference to this ITensorInfo object
+ */
+ virtual ITensorInfo &set_lock_paddings(bool flag) = 0;
+ /** Get the lock paddings flag value
+ *
+ * @return lock paddings flag value
+ */
+ virtual bool lock_paddings() const = 0;
/** Update the offset to the first element, the strides and the total size.
*
* @note This function can only increase the offset, strides and total size.
@@ -240,6 +260,11 @@ public:
* @return True if its dynamic else false
*/
virtual bool is_dynamic() const = 0;
+ /** Flag indicating whether the values of the tensor are constant, meaning that they can change on kernel/function execution.
+ *
+ * @return True if values are constant else false
+ */
+ virtual bool are_values_constant() const = 0;
/** Set the flag whether the tensor size can be changed.
*
* @param[in] is_resizable Flag that marks the tensor if it can be changed or not.
@@ -247,6 +272,13 @@ public:
* @return Reference to this ITensorInfo object
*/
virtual ITensorInfo &set_is_resizable(bool is_resizable) = 0;
+ /** Set the flag whether the tensor values can change during kernel/function execution.
+ *
+ * @param[in] are_values_constant Flag that marks the tensor values if they can be changed or not.
+ *
+ * @return Reference to this ITensorInfo object
+ */
+ virtual ITensorInfo &set_are_values_constant(bool are_values_constant) = 0;
/** Valid region of the tensor. All elements in the valid region have defined values, i.e. are not undefined.
*
* @return The valid region.
@@ -268,7 +300,20 @@ public:
* @return A DataLayout containing the layout data information.
*/
virtual DataLayout data_layout() const = 0;
-
+ /** Get the workload tensor id of the tensor.
+ *
+ * @return Workload tensor id of the tensor
+ */
+ virtual Id id() const = 0;
+ /** Set the tensor id
+ */
+ virtual ITensorInfo &set_id(ITensorInfo::Id id) = 0;
+ /** Check if the tensor id is valid
+ */
+ bool has_valid_id() const
+ {
+ return id() != invalid_tensor_id;
+ }
/** If infos are broadcast compatible tensor info's, return the broadcasted shape and the intersection of
* the broadcasted valid regions of the tensors.
*
@@ -284,23 +329,23 @@ public:
* not broadcast compatible.
*/
template <typename... Infos>
- static std::pair<TensorShape, ValidRegion> broadcast_shape_and_valid_region(const Infos &... infos)
+ static std::pair<TensorShape, ValidRegion> broadcast_shape_and_valid_region(const Infos &...infos)
{
TensorShape bc_shape = TensorShape::broadcast_shape(infos.tensor_shape()...);
- ValidRegion bc_valid_region{ Coordinates(), bc_shape };
+ ValidRegion bc_valid_region{Coordinates(), bc_shape};
- auto broadcast_valid_region = [&bc_valid_region](const ITensorInfo & info)
+ auto broadcast_valid_region = [&bc_valid_region](const ITensorInfo &info)
{
- if(info.num_dimensions() != 0)
+ if (info.num_dimensions() != 0)
{
- for(size_t d = 0; d < bc_valid_region.shape.num_dimensions(); ++d)
+ for (size_t d = 0; d < bc_valid_region.shape.num_dimensions(); ++d)
{
const bool is_broadcast = (info.tensor_shape()[d] == 1);
const int anchor_max = std::max(bc_valid_region.anchor[d], info.valid_region().anchor[d]);
const size_t valid_min = std::min(bc_valid_region.shape[d], info.valid_region().shape[d]);
- if(!is_broadcast || (valid_min == 0))
+ if (!is_broadcast || (valid_min == 0))
{
bc_valid_region.anchor.set(d, anchor_max);
bc_valid_region.shape.set(d, valid_min);
diff --git a/arm_compute/core/ITensorPack.h b/arm_compute/core/ITensorPack.h
index 17b7241862..f456c50769 100644
--- a/arm_compute/core/ITensorPack.h
+++ b/arm_compute/core/ITensorPack.h
@@ -42,18 +42,16 @@ public:
struct PackElement
{
PackElement() = default;
- PackElement(int id, ITensor *tensor)
- : id(id), tensor(tensor), ctensor(nullptr)
+ PackElement(int id, ITensor *tensor) : id(id), tensor(tensor), ctensor(nullptr)
{
}
- PackElement(int id, const ITensor *ctensor)
- : id(id), tensor(nullptr), ctensor(ctensor)
+ PackElement(int id, const ITensor *ctensor) : id(id), tensor(nullptr), ctensor(ctensor)
{
}
- int id{ -1 };
- ITensor *tensor{ nullptr };
- const ITensor *ctensor{ nullptr };
+ int id{-1};
+ ITensor *tensor{nullptr};
+ const ITensor *ctensor{nullptr};
};
public:
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index 6c1fc74b1e..168a06a55c 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,116 +21,139 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H
-#define ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H
+#ifndef ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H
+#define ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
namespace arm_compute
{
/** Descriptor for FFT scale kernels */
struct FFTScaleKernelInfo
{
- float scale{ 0.f }; /**< Axis to perform the kernel on. */
- bool conjugate{ true }; /**< Flag to conjugate the output/ */
+ float scale{0.f}; /**< Axis to perform the kernel on. */
+ bool conjugate{true}; /**< Flag to conjugate the output/ */
};
/** Descriptor for FFT digit reverse kernels */
struct FFTDigitReverseKernelInfo
{
- unsigned int axis{ 0 }; /**< Axis to perform the kernel on. */
- bool conjugate{ false }; /**< Flag to conjugate the output/ */
+ unsigned int axis{0}; /**< Axis to perform the kernel on. */
+ bool conjugate{false}; /**< Flag to conjugate the output/ */
};
/** Descriptor used by the FFT core kernels */
struct FFTRadixStageKernelInfo
{
- unsigned int axis{ 0 }; /**< Axis to run the kernel on. */
- unsigned int radix{ 0 }; /**< Radix to use. */
- unsigned int Nx{ 0 }; /**< Nx coefficient. */
- bool is_first_stage{ false }; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */
+ unsigned int axis{0}; /**< Axis to run the kernel on. */
+ unsigned int radix{0}; /**< Radix to use. */
+ unsigned int Nx{0}; /**< Nx coefficient. */
+ bool is_first_stage{false}; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */
};
+class ITensorInfo;
/** Descriptor used by the GEMM kernels */
struct GEMMKernelInfo
{
GEMMKernelInfo() = default;
- GEMMKernelInfo(
- unsigned int im,
- unsigned int in,
- unsigned int ik,
- unsigned int idepth_output_gemm3d,
- bool ireinterpret_input_as_3d,
- bool ibroadcast_bias,
- bool ifp_mixed_precision,
- bool ihas_pad_y,
- ActivationLayerInfo iactivation_info,
- int inmult_transpose1xW_width,
- int imult_interleave4x4_height,
- GEMMLHSMatrixInfo ilhs_info,
- GEMMRHSMatrixInfo irhs_info,
- int32_t ina_offset,
- int32_t inb_offset)
- : m(im), n(in), k(ik), depth_output_gemm3d(idepth_output_gemm3d), reinterpret_input_as_3d(ireinterpret_input_as_3d), broadcast_bias(ibroadcast_bias), fp_mixed_precision(ifp_mixed_precision),
- has_pad_y(ihas_pad_y), activation_info(iactivation_info), mult_transpose1xW_width(inmult_transpose1xW_width), mult_interleave4x4_height(imult_interleave4x4_height), lhs_info(ilhs_info),
- rhs_info(irhs_info), a_offset(ina_offset), b_offset(inb_offset)
+ GEMMKernelInfo(unsigned int im,
+ unsigned int in,
+ unsigned int ik,
+ unsigned int idepth_output_gemm3d,
+ bool ireinterpret_input_as_3d,
+ bool ibroadcast_bias,
+ bool ifp_mixed_precision,
+ bool ihas_pad_y,
+ ActivationLayerInfo iactivation_info,
+ int inmult_transpose1xW_width,
+ int imult_interleave4x4_height,
+ GEMMLHSMatrixInfo ilhs_info,
+ GEMMRHSMatrixInfo irhs_info,
+ int32_t ina_offset,
+ int32_t inb_offset)
+ : m(im),
+ n(in),
+ k(ik),
+ depth_output_gemm3d(idepth_output_gemm3d),
+ reinterpret_input_as_3d(ireinterpret_input_as_3d),
+ broadcast_bias(ibroadcast_bias),
+ fp_mixed_precision(ifp_mixed_precision),
+ has_pad_y(ihas_pad_y),
+ activation_info(iactivation_info),
+ mult_transpose1xW_width(inmult_transpose1xW_width),
+ mult_interleave4x4_height(imult_interleave4x4_height),
+ lhs_info(ilhs_info),
+ rhs_info(irhs_info),
+ a_offset(ina_offset),
+ b_offset(inb_offset)
{
}
- unsigned int m{ 0 }; /**< Number of LHS rows*/
- unsigned int n{ 0 }; /**< Number of RHS columns*/
- unsigned int k{ 0 }; /**< Number of LHS columns or RHS rows */
- unsigned int depth_output_gemm3d{ 0 }; /**< Depth of the output tensor in case is reinterpreted as 3D */
- bool reinterpret_input_as_3d{ false }; /**< Flag used to reinterpret the input as 3D */
- bool broadcast_bias{ false }; /**< Flag used to broadcast the bias addition */
- bool fp_mixed_precision{ false }; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */
- bool has_pad_y{ false }; /**< Flag used to indicate if the input/output tensors have internal pad on the y direction */
- ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */
- int mult_transpose1xW_width{ 1 }; /**< Multiplication factor for the width of the 1xW transposed block */
- int mult_interleave4x4_height{ 1 }; /**< Multiplication factor for the height of the 4x4 interleaved block */
- GEMMLHSMatrixInfo lhs_info{}; /**< LHS matrix information used to retrieve the number of rows processed by each thread */
- GEMMRHSMatrixInfo rhs_info{}; /**< RHS matrix information used for reshaping the RHS matrix */
- int32_t a_offset{ 0 }; /**< Offset to be added to each element of the matrix A */
- int32_t b_offset{ 0 }; /**< Offset to be added to each element of the matrix B */
- GEMMLowpOutputStageInfo output_stage{}; /**< GEMMLowp output stage information */
+ unsigned int m{0}; /**< Number of LHS rows*/
+ unsigned int n{0}; /**< Number of RHS columns*/
+ unsigned int k{0}; /**< Number of LHS columns or RHS rows */
+ unsigned int depth_output_gemm3d{0}; /**< Depth of the output tensor in case is reinterpreted as 3D */
+ bool reinterpret_input_as_3d{false}; /**< Flag used to reinterpret the input as 3D */
+ bool broadcast_bias{false}; /**< Flag used to broadcast the bias addition */
+ bool fp_mixed_precision{false}; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */
+ bool has_pad_y{
+ false}; /**< Flag used to indicate if the input/output tensors have internal pad on the y direction */
+ ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */
+ int mult_transpose1xW_width{1}; /**< Multiplication factor for the width of the 1xW transposed block */
+ int mult_interleave4x4_height{1}; /**< Multiplication factor for the height of the 4x4 interleaved block */
+ GEMMLHSMatrixInfo
+ lhs_info{}; /**< LHS matrix information used to retrieve the number of rows processed by each thread */
+ GEMMRHSMatrixInfo rhs_info{}; /**< RHS matrix information used for reshaping the RHS matrix */
+ int32_t a_offset{0}; /**< Offset to be added to each element of the matrix A */
+ int32_t b_offset{0}; /**< Offset to be added to each element of the matrix B */
+ GEMMLowpOutputStageInfo output_stage{}; /**< GEMMLowp output stage information */
};
-/** Descriptor used by the depthwise convolution kernels */
-struct DWCKernelInfo
+/** Compute descriptor used by the depthwise convolution native kernel */
+struct DWCComputeKernelInfo
{
- ActivationLayerInfo activation_info{}; /**< Activation function to perform after the depthwise convolution */
+ unsigned int n0{1}; /**< Number of columns processed by each thread */
+ unsigned int m0{1}; /**< Number of rows processed by each thread */
+ bool export_input_to_cl_image{false}; /**< Export input to cl_image */
+ bool export_weights_to_cl_image{false}; /**< Export the weights to cl_image */
};
-/** Descriptor used by the depthwise convolution kernels to retrieve the number of output elements processed by each thread */
-struct DWCWeightsKernelInfo
+/** Compute descriptor used by the direct convolution kernel */
+struct DirectConvComputeKernelInfo
{
- unsigned int n0{ 0 }; /**< Number of columns processed by each thread */
+ int32_t m0{1}; /**< Number of rows to be processed by the kernel */
+ int32_t n0{1}; /**< Number of columns to be processed by the kernel */
+ int32_t k0{1}; /**< Number of partial accumulations to be processed in a single iteration by the kernel */
+ bool export_weights_to_cl_image{false}; /**< Flag to export the weights to cl_image */
+ bool export_output_to_cl_image{false}; /**< Flag to export the output to cl_image */
+ bool export_input_to_cl_image{false}; /**< Flag to export the input to cl_image */
};
/** Descriptor used by the softmax kernels */
struct SoftmaxKernelInfo
{
- float beta{ 1.f }; /**< A scaling factor for the exponent with default value 1.0 */
- bool is_log{ false }; /**< Flag used to perform Log Softmax operation */
- DataType input_data_type{ DataType::UNKNOWN }; /**< Input tensor data type */
- int32_t axis{ 0 }; /**< The dimension in which to apply softmax. */
+ float beta{1.f}; /**< A scaling factor for the exponent with default value 1.0 */
+ bool is_log{false}; /**< Flag used to perform Log Softmax operation */
+ DataType input_data_type{DataType::UNKNOWN}; /**< Input tensor data type */
+ int32_t axis{0}; /**< The dimension in which to apply softmax. */
};
/** Descriptor used by the direct convolution layer output stage kernels */
struct DirectConvolutionLayerOutputStageKernelInfo
{
- int32_t result_fixedpoint_multiplier{ 0 }; /**< Result output stage multiplier used for quantizing */
- int32_t result_shift{ 0 }; /**< Result output stage shift used for quantizing */
- int32_t result_offset_after_shift{ 0 }; /**< Result offset used for quantizing */
- DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */
+ int32_t result_fixedpoint_multiplier{0}; /**< Result output stage multiplier used for quantizing */
+ int32_t result_shift{0}; /**< Result output stage shift used for quantizing */
+ int32_t result_offset_after_shift{0}; /**< Result offset used for quantizing */
+ DataType output_data_type{
+ DataType::UNKNOWN}; /**< Output tensor data type to use if the output is not initialized */
};
struct InstanceNormalizationLayerKernelInfo
{
/** Default constructor */
- InstanceNormalizationLayerKernelInfo()
- : InstanceNormalizationLayerKernelInfo(1.f, 0.f, 1e-12, true)
+ InstanceNormalizationLayerKernelInfo() : InstanceNormalizationLayerKernelInfo(1.f, 0.f, 1e-12, true)
{
}
/** Constructor
@@ -167,10 +190,10 @@ struct GEMMLowpReductionKernelInfo
{
}
- int32_t k{ 0 }; /**< Number of matrix columns/rows */
- bool is_reshaped{ false }; /**< True if the input tensor has been reshaped */
- int32_t scalar{ 0 }; /**< Scalar value to multiply each reduced column/row by */
- bool mul_by_scalar{ false }; /**< True if each column/row reduction has to be multiplied by a scalar value */
+ int32_t k{0}; /**< Number of matrix columns/rows */
+ bool is_reshaped{false}; /**< True if the input tensor has been reshaped */
+ int32_t scalar{0}; /**< Scalar value to multiply each reduced column/row by */
+ bool mul_by_scalar{false}; /**< True if each column/row reduction has to be multiplied by a scalar value */
};
struct ScaleKernelInfo
@@ -192,13 +215,13 @@ struct ScaleKernelInfo
bool use_padding = true,
bool align_corners = false,
DataLayout data_layout = DataLayout::UNKNOWN) noexcept
- : interpolation_policy{ interpolation_policy },
- border_mode{ border_mode },
- constant_border_value{ constant_border_value },
- sampling_policy{ sampling_policy },
- use_padding{ use_padding },
- align_corners{ align_corners },
- data_layout{ data_layout }
+ : interpolation_policy{interpolation_policy},
+ border_mode{border_mode},
+ constant_border_value{constant_border_value},
+ sampling_policy{sampling_policy},
+ use_padding{use_padding},
+ align_corners{align_corners},
+ data_layout{data_layout}
{
}
@@ -211,16 +234,20 @@ struct ScaleKernelInfo
DataLayout data_layout; /**< Data layout to use */
};
-struct RemapInfo
+struct MatMulKernelInfo
{
- RemapInfo() = default;
- RemapInfo(InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value)
- : policy(policy), border_mode(border_mode), constant_border_value(constant_border_value)
+ MatMulKernelInfo() = default;
+ MatMulKernelInfo(
+ bool adj_lhs, bool adj_rhs, int m0 = 1, int n0 = 1, int k0 = 1, bool export_rhs_to_cl_image = false)
+ : adj_lhs{adj_lhs}, adj_rhs{adj_rhs}, m0{m0}, n0{n0}, k0{k0}, export_rhs_to_cl_image{export_rhs_to_cl_image}
{
}
- InterpolationPolicy policy;
- BorderMode border_mode;
- PixelValue constant_border_value;
+ bool adj_lhs{false}; /**< Get Adjoint LHS flag value */
+ bool adj_rhs{false}; /**< Get Adjoint RHS flag value */
+ int m0{1}; /**< Number of output rows processed by each work-item*/
+ int n0{1}; /**< Number of output columns processed by each work-item*/
+ int k0{1}; /**< Number of inner accumulations */
+ bool export_rhs_to_cl_image{false}; /**< Flag to know whether the RHS tensor should be exported to cl_image*/
};
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H */
+#endif // ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H
diff --git a/arm_compute/core/Log.h b/arm_compute/core/Log.h
index bc0ecb802e..03b861f765 100644
--- a/arm_compute/core/Log.h
+++ b/arm_compute/core/Log.h
@@ -34,11 +34,11 @@
#define ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER() \
do \
{ \
- if(arm_compute::logging::LoggerRegistry::get().logger("CORE") == nullptr) \
+ if (arm_compute::logging::LoggerRegistry::get().logger("CORE") == nullptr) \
{ \
arm_compute::logging::LoggerRegistry::get().create_reserved_loggers(); \
} \
- } while(false)
+ } while (false)
#else /* ARM_COMPUTE_LOGGING_ENABLED */
#define ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER()
#endif /* ARM_COMPUTE_LOGGING_ENABLED */
@@ -53,7 +53,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_MSG("CORE", log_level, msg); \
- } while(false)
+ } while (false)
/** Log a message with format to the core system logger
*
@@ -66,7 +66,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_MSG_WITH_FORMAT("CORE", log_level, fmt, __VA_ARGS__); \
- } while(false)
+ } while (false)
/** Log a stream to the core system logger
*
@@ -78,7 +78,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_STREAM("CORE", log_level, ss); \
- } while(false)
+ } while (false)
/** Log information level message to the core system logger
*
@@ -89,7 +89,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_MSG_CORE(arm_compute::logging::LogLevel::INFO, msg); \
- } while(false)
+ } while (false)
/** Log information level formatted message to the core system logger
*
@@ -101,7 +101,7 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(arm_compute::logging::LogLevel::INFO, #fmt, __VA_ARGS__); \
- } while(false)
+ } while (false)
/** Log information level stream to the core system logger
*
@@ -112,6 +112,6 @@
{ \
ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
ARM_COMPUTE_LOG_STREAM_CORE(arm_compute::logging::LogLevel::INFO, ss); \
- } while(false)
+ } while (false)
#endif /* ARM_COMPUTE_LOGGING_MACROS_H */
diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h
index 0e3d26c515..0b4df4f2e2 100644
--- a/arm_compute/core/PixelValue.h
+++ b/arm_compute/core/PixelValue.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#ifndef ARM_COMPUTE_PIXELVALUE_H
#define ARM_COMPUTE_PIXELVALUE_H
+#include "arm_compute/core/QuantizationInfo.h"
#include "arm_compute/core/Types.h"
#include <cstdint>
@@ -35,11 +36,7 @@ class PixelValue
{
public:
/** Default constructor: value initialized to 0 */
- PixelValue() noexcept
- : value
- {
- int64_t(0)
- }
+ PixelValue() noexcept : value{int64_t(0)}
{
}
/** Initialize the union with a pixel value of chosen datatype
@@ -48,10 +45,9 @@ public:
* @param[in] datatype DataType that @p v have to be stored
* @param[in] qinfo (Optional) QuantizationInfo to apply in case of quantized data types to @p v
*/
- PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo())
- : PixelValue()
+ PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo()) : PixelValue()
{
- switch(datatype)
+ switch (datatype)
{
case DataType::U8:
value.u8 = static_cast<uint8_t>(v);
@@ -111,8 +107,7 @@ public:
*
* @param[in] v S8 value.
*/
- PixelValue(int8_t v)
- : PixelValue()
+ PixelValue(int8_t v) : PixelValue()
{
value.s8 = v;
}
@@ -120,8 +115,7 @@ public:
*
* @param[in] v U8 value.
*/
- PixelValue(uint8_t v)
- : PixelValue()
+ PixelValue(uint8_t v) : PixelValue()
{
value.u8 = v;
}
@@ -129,8 +123,7 @@ public:
*
* @param[in] v U16 value.
*/
- PixelValue(uint16_t v)
- : PixelValue()
+ PixelValue(uint16_t v) : PixelValue()
{
value.u16 = v;
}
@@ -138,8 +131,7 @@ public:
*
* @param[in] v S16 value.
*/
- PixelValue(int16_t v)
- : PixelValue()
+ PixelValue(int16_t v) : PixelValue()
{
value.s16 = v;
}
@@ -147,8 +139,7 @@ public:
*
* @param[in] v U32 value.
*/
- PixelValue(uint32_t v)
- : PixelValue()
+ PixelValue(uint32_t v) : PixelValue()
{
value.u32 = v;
}
@@ -156,8 +147,7 @@ public:
*
* @param[in] v S32 value.
*/
- PixelValue(int32_t v)
- : PixelValue()
+ PixelValue(int32_t v) : PixelValue()
{
value.s32 = v;
}
@@ -166,8 +156,7 @@ public:
*
* @param[in] v U64 value.
*/
- PixelValue(uint64_t v)
- : PixelValue()
+ PixelValue(uint64_t v) : PixelValue()
{
value.u64 = v;
}
@@ -175,8 +164,7 @@ public:
*
* @param[in] v S64 value.
*/
- PixelValue(int64_t v)
- : PixelValue()
+ PixelValue(int64_t v) : PixelValue()
{
value.s64 = v;
}
@@ -184,8 +172,7 @@ public:
*
* @param[in] v F16 value.
*/
- PixelValue(bfloat16 v)
- : PixelValue()
+ PixelValue(bfloat16 v) : PixelValue()
{
value.bf16 = v;
}
@@ -193,8 +180,7 @@ public:
*
* @param[in] v F16 value.
*/
- PixelValue(half v)
- : PixelValue()
+ PixelValue(half v) : PixelValue()
{
value.f16 = v;
}
@@ -202,8 +188,7 @@ public:
*
* @param[in] v F32 value.
*/
- PixelValue(float v)
- : PixelValue()
+ PixelValue(float v) : PixelValue()
{
value.f32 = v;
}
@@ -211,8 +196,7 @@ public:
*
* @param[in] v F64 value.
*/
- PixelValue(double v)
- : PixelValue()
+ PixelValue(double v) : PixelValue()
{
value.f64 = v;
}
@@ -220,23 +204,23 @@ public:
* Use the field corresponding to the image format
*/
union
- {
- uint64_t u64; /**< Single channel U64 */
- int64_t s64; /**< Single channel S64 */
- uint8_t rgb[3]; /**< 3 channels: RGB888 */
- uint8_t yuv[3]; /**< 3 channels: Any YUV format */
- uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */
- double f64; /**< Single channel double */
- float f32; /**< Single channel float 32 */
- half f16; /**< Single channel F16 */
- bfloat16 bf16; /**< Single channel brain floating-point number */
- uint8_t u8; /**< Single channel U8 */
- int8_t s8; /**< Single channel S8 */
- uint16_t u16; /**< Single channel U16 */
- int16_t s16; /**< Single channel S16 */
- uint32_t u32; /**< Single channel U32 */
- int32_t s32; /**< Single channel S32 */
- } value;
+ {
+ uint64_t u64; /**< Single channel U64 */
+ int64_t s64; /**< Single channel S64 */
+ uint8_t rgb[3]; /**< 3 channels: RGB888 */
+ uint8_t yuv[3]; /**< 3 channels: Any YUV format */
+ uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */
+ double f64; /**< Single channel double */
+ float f32; /**< Single channel float 32 */
+ half f16; /**< Single channel F16 */
+ bfloat16 bf16; /**< Single channel brain floating-point number */
+ uint8_t u8; /**< Single channel U8 */
+ int8_t s8; /**< Single channel S8 */
+ uint16_t u16; /**< Single channel U16 */
+ int16_t s16; /**< Single channel S16 */
+ uint32_t u32; /**< Single channel U32 */
+ int32_t s32; /**< Single channel S32 */
+ } value;
/** Interpret the pixel value as a U8
*
* @param[out] v Returned value
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h
index b331f7d923..aecba3712e 100644
--- a/arm_compute/core/QuantizationInfo.h
+++ b/arm_compute/core/QuantizationInfo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,16 +21,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_QUANTIZATION_INFO_H
-#define ARM_COMPUTE_QUANTIZATION_INFO_H
+#ifndef ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H
+#define ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Rounding.h"
+#include "arm_compute/core/utils/misc/Utility.h"
+
#include "support/ToolchainSupport.h"
-#include "utils/misc/Utility.h"
-#include <cstddef>
-#include <type_traits>
#include <vector>
namespace arm_compute
@@ -44,8 +42,7 @@ using qasymm16_t = uint16_t; /**< 16 bit quantized asymmetric scalar value
struct UniformQuantizationInfo
{
/** Default constructor */
- UniformQuantizationInfo()
- : scale(0.f), offset(0)
+ UniformQuantizationInfo() : scale(0.f), offset(0)
{
}
/** Constructor
@@ -53,8 +50,7 @@ struct UniformQuantizationInfo
* @param[in] scale Quantization scale
* @param[in] offset Quantization offset
*/
- UniformQuantizationInfo(float scale, int32_t offset)
- : scale(scale), offset(offset)
+ UniformQuantizationInfo(float scale, int32_t offset) : scale(scale), offset(offset)
{
}
/** Checks if the scale and offset are both zero */
@@ -72,9 +68,7 @@ class QuantizationInfo
{
public:
/** Default constructor */
- QuantizationInfo() noexcept
- : _scale(),
- _offset()
+ QuantizationInfo() noexcept : _scale(), _offset()
{
}
/** Construct quantization info.
@@ -83,19 +77,19 @@ public:
*
* @param[in] scale Scale.
*/
- QuantizationInfo(float scale)
- : _scale(1, scale), _offset()
+ QuantizationInfo(float scale) : _scale(1, scale), _offset()
{
}
/** Construct quantization info.
*
* @note Used for asymmetric quantization
*
- * @param[in] scale Scale.
- * @param[in] offset Offset.
+ * @param[in] scale Scale.
+ * @param[in] offset Offset.
+ * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change.
*/
- QuantizationInfo(float scale, int offset)
- : _scale(1, scale), _offset(1, offset)
+ QuantizationInfo(float scale, int offset, bool is_dynamic = false)
+ : _scale(1, scale), _offset(1, offset), _is_dynamic(is_dynamic)
{
}
/** Construct quantization info.
@@ -104,19 +98,19 @@ public:
*
* @param[in] scale Scale.
*/
- QuantizationInfo(std::vector<float> scale)
- : _scale(scale), _offset()
+ QuantizationInfo(std::vector<float> scale) : _scale(scale), _offset()
{
}
/** Construct quantization info.
*
* @note Used for asymmetric per channel quantization
*
- * @param[in] scale Scale.
- * @param[in] offset Offset.
+ * @param[in] scale Scale.
+ * @param[in] offset Offset.
+ * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change.
*/
- QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset)
- : _scale(scale), _offset(offset)
+ QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset, bool is_dynamic = false)
+ : _scale(scale), _offset(offset), _is_dynamic(is_dynamic)
{
}
/** Scale vector accessor
@@ -135,6 +129,14 @@ public:
{
return _offset;
}
+ /** is_dynamic accessor
+ *
+ * @return If true, the scale and offset may change, so operators will need to read on every run
+ */
+ bool is_dynamic() const
+ {
+ return _is_dynamic;
+ }
/** Indicates whether this QuantizationInfo has valid settings or not
*
* @return True if the this has invalid settings.
@@ -159,6 +161,8 @@ public:
private:
std::vector<float> _scale; /**< Vector containing scaling factors */
std::vector<int32_t> _offset; /**< Vector containing zero offsets */
+ bool _is_dynamic =
+ false; /**< If true, the scale and offset may change, so operators will need to read on every run */
};
/** Check whether two quantization info are equal.
@@ -211,8 +215,7 @@ inline bool operator!=(const UniformQuantizationInfo &lhs, const UniformQuantiza
template <typename QUANTIZED_TYPE = uint8_t>
struct Qasymm8QuantizationHelper
{
- static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value
- || std::is_same<QUANTIZED_TYPE, int8_t>::value,
+ static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value || std::is_same<QUANTIZED_TYPE, int8_t>::value,
"quantized type should be either uint8_t or int8_t.");
/** Quantize a value given a 8-bit asymmetric quantization scheme
@@ -237,9 +240,10 @@ struct Qasymm8QuantizationHelper
*
* @return Quantized value
*/
- static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy)
+ static inline QUANTIZED_TYPE
+ quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy)
{
- if(rounding_policy == RoundingPolicy::TO_NEAREST_UP)
+ if (rounding_policy == RoundingPolicy::TO_NEAREST_UP)
{
return quantize(value, qinfo);
}
@@ -257,7 +261,8 @@ struct Qasymm8QuantizationHelper
*
* @return Quantized value
*/
- static inline QUANTIZED_TYPE quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+ static inline QUANTIZED_TYPE
+ quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
const UniformQuantizationInfo uqinfo = qinfo.uniform();
ARM_COMPUTE_ERROR_ON(uqinfo.scale == 0);
@@ -300,7 +305,8 @@ struct Qasymm8QuantizationHelper
* @return Quantized value
*/
template <typename INFO_TYPE>
-inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline uint8_t
+quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
return Qasymm8QuantizationHelper<uint8_t>::quantize(value, qinfo, rounding_policy);
}
@@ -314,7 +320,9 @@ inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPol
* @return Quantized value
*/
template <typename INFO_TYPE>
-inline int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline int8_t quantize_qasymm8_signed(float value,
+ const INFO_TYPE &qinfo,
+ RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
return Qasymm8QuantizationHelper<int8_t>::quantize(value, qinfo, rounding_policy);
}
@@ -436,6 +444,19 @@ inline float dequantize(uint16_t value, float scale, int32_t offset)
return (static_cast<int>(value) - offset) * scale;
}
+/** Dequantize a value given a 32-bit asymmetric quantization scheme
+ *
+ * @param[in] value Value to dequantize
+ * @param[in] scale Scale to use for dequantization
+ * @param[in] offset Zero-offset to use for dequantization
+ *
+ * @return Dequantized value
+ */
+inline float dequantize(int32_t value, float scale, int32_t offset)
+{
+ return (static_cast<int>(value) - offset) * scale;
+}
+
/** Quantize a value given a 16-bit symmetric quantization scheme
*
* @param[in] value Value to quantize
@@ -444,7 +465,9 @@ inline float dequantize(uint16_t value, float scale, int32_t offset)
*
* @return Quantized value
*/
-inline int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline int16_t quantize_qsymm16(float value,
+ const UniformQuantizationInfo &qinfo,
+ RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
int quantized = arm_compute::round(value / qinfo.scale, rounding_policy);
quantized = arm_compute::utility::clamp<int, int16_t>(quantized);
@@ -495,7 +518,9 @@ inline float dequantize_qsymm16(int16_t value, const QuantizationInfo &qinfo)
*
* @return Quantized value
*/
-inline uint16_t quantize_qasymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline uint16_t quantize_qasymm16(float value,
+ const UniformQuantizationInfo &qinfo,
+ RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
{
int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset;
quantized = arm_compute::utility::clamp<int, uint16_t>(quantized);
@@ -538,6 +563,31 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo)
return dequantize_qasymm16(value, qinfo.uniform());
}
+/** Dequantize a value given a 32-bit asymmetric quantization scheme
+ *
+ * @param[in] value Value to dequantize
+ * @param[in] qinfo Quantization information to use for dequantizing
+ *
+ * @return Dequantized value
+ */
+inline float dequantize_s32(int32_t value, const UniformQuantizationInfo &qinfo)
+{
+ return (static_cast<int>(value) - qinfo.offset) * qinfo.scale;
+}
+
+/** Dequantize a value given a 32-bit asymmetric quantization scheme
+ *
+ * @param[in] value Value to dequantize
+ * @param[in] qinfo Quantization information to use for dequantizing
+ *
+ * @return Dequantized value
+ */
+
+inline float dequantize_s32(int32_t value, const QuantizationInfo &qinfo)
+{
+ return dequantize_s32(value, qinfo.uniform());
+}
+
/*
* In case of requantization of a quantized input tensor to an output tensor with another quantization
* instead of applying dequantization and then a quantization functions, we just compute new scale and
@@ -568,7 +618,8 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo)
* z_n = - z_i * s_i / s_o + z_o
*
*/
-inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, const UniformQuantizationInfo &uqinfo_out)
+inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in,
+ const UniformQuantizationInfo &uqinfo_out)
{
float scale_to_apply = uqinfo_out.scale;
int32_t offset_to_apply = uqinfo_out.offset;
@@ -582,4 +633,4 @@ inline UniformQuantizationInfo compute_requantization_scale_offset(const Uniform
}
} // namespace arm_compute
-#endif /* ARM_COMPUTE_QUANTIZATION_INFO_H */
+#endif // ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H
diff --git a/arm_compute/core/Rounding.h b/arm_compute/core/Rounding.h
index b6817b5107..30a5a0fe9d 100644
--- a/arm_compute/core/Rounding.h
+++ b/arm_compute/core/Rounding.h
@@ -42,5 +42,5 @@ enum class RoundingPolicy
* @return Rounded value of the argument x.
*/
int round(float x, RoundingPolicy rounding_policy);
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_ROUNDING_H */
diff --git a/arm_compute/core/Size2D.h b/arm_compute/core/Size2D.h
index bcd89cb310..672b392050 100644
--- a/arm_compute/core/Size2D.h
+++ b/arm_compute/core/Size2D.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,8 +41,7 @@ public:
* @param[in] w Width of the image or rectangle
* @param[in] h Height of the image or rectangle
*/
- Size2D(size_t w, size_t h)
- : width(w), height(h)
+ Size2D(size_t w, size_t h) noexcept : width(w), height(h)
{
}
/** The area of the image or rectangle calculated as (width * height)
@@ -89,5 +88,5 @@ public:
size_t width = {}; /**< Width of the image region or rectangle */
size_t height = {}; /**< Height of the image region or rectangle */
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_SIZE2D_H */
diff --git a/arm_compute/core/Size3D.h b/arm_compute/core/Size3D.h
new file mode 100644
index 0000000000..e2dc6fe012
--- /dev/null
+++ b/arm_compute/core/Size3D.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_SIZE3D_H
+#define ARM_COMPUTE_SIZE3D_H
+
+#include <string>
+
+namespace arm_compute
+{
+/** Class for specifying the size of a 3D shape or object */
+class Size3D
+{
+public:
+ /** Default constructor */
+ Size3D() = default;
+ /** Constructor. Initializes "width", "height" and "depth" respectively with "w", "h" and "d"
+ *
+ * @param[in] w Width of the 3D shape or object
+ * @param[in] h Height of the 3D shape or object
+ * @param[in] d Depth of the 3D shape or object
+ */
+ Size3D(size_t w, size_t h, size_t d) noexcept : width(w), height(h), depth(d)
+ {
+ }
+
+ /** Convert the values stored to string
+ *
+ * @return string of (width x height x depth).
+ */
+ std::string to_string() const;
+
+ /** Semantic accessor for width as x.
+ *
+ * @return x.
+ */
+ size_t x() const
+ {
+ return width;
+ }
+
+ /** Semantic accessor for height as y.
+ *
+ * @return y.
+ */
+ size_t y() const
+ {
+ return height;
+ }
+
+ /** Semantic accessor for depth as z.
+ *
+ * @return z.
+ */
+ size_t z() const
+ {
+ return depth;
+ }
+
+ bool operator!=(const Size3D &other) const
+ {
+ return !(*this == other);
+ }
+
+ bool operator==(const Size3D &other) const
+ {
+ return (width == other.width) && (height == other.height) && (depth == other.depth);
+ }
+
+public:
+ size_t width = {}; /**< Width of the 3D shape or object */
+ size_t height = {}; /**< Height of the 3D shape or object */
+ size_t depth = {}; /**< Depth of the 3D shape or object */
+};
+
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_SIZE3D_H */
diff --git a/arm_compute/core/Steps.h b/arm_compute/core/Steps.h
index 208fc4b294..6b261becc0 100644
--- a/arm_compute/core/Steps.h
+++ b/arm_compute/core/Steps.h
@@ -45,8 +45,7 @@ public:
* @param[in] steps Values to initialize the steps.
*/
template <typename... Ts>
- Steps(Ts... steps)
- : Dimensions{ steps... }
+ Steps(Ts... steps) : Dimensions{steps...}
{
// Initialize empty dimensions to 1
std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
@@ -62,5 +61,5 @@ public:
/** Default destructor */
~Steps() = default;
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_STEPS_H*/
diff --git a/arm_compute/core/Strides.h b/arm_compute/core/Strides.h
index 265799e41e..627b219987 100644
--- a/arm_compute/core/Strides.h
+++ b/arm_compute/core/Strides.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2019, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,7 @@
#include <algorithm>
#include <array>
#include <cstddef>
+#include <cstdint>
namespace arm_compute
{
@@ -42,8 +43,7 @@ public:
* @param[in] strides Values to initialize the strides.
*/
template <typename... Ts>
- constexpr Strides(Ts... strides)
- : Dimensions{ strides... }
+ constexpr Strides(Ts... strides) : Dimensions{strides...}
{
}
/** Allow instances of this class to be copy constructed */
diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h
index 1b2278d99b..7a3ee2cfd0 100644
--- a/arm_compute/core/SubTensorInfo.h
+++ b/arm_compute/core/SubTensorInfo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,10 +24,9 @@
#ifndef ARM_COMPUTE_SUBTENSORINFO_H
#define ARM_COMPUTE_SUBTENSORINFO_H
-#include "arm_compute/core/ITensorInfo.h"
-
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Strides.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/TensorShape.h"
@@ -73,7 +72,7 @@ public:
// Inherited methods overridden:
std::unique_ptr<ITensorInfo> clone() const override;
- ITensorInfo &set_data_type(DataType data_type) override
+ ITensorInfo &set_data_type(DataType data_type) override
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
_parent->set_data_type(data_type);
@@ -116,7 +115,13 @@ public:
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
return _parent->auto_padding();
};
+
+ ITensorInfo &set_lock_paddings(bool flag) override;
+
+ bool lock_paddings() const override;
+
bool extend_padding(const PaddingSize &padding) override;
+
size_t dimension(size_t index) const override
{
return _tensor_shape[index];
@@ -137,7 +142,7 @@ public:
return _parent->offset_element_in_bytes(_coords);
}
int32_t offset_element_in_bytes(const Coordinates &pos) const override;
- size_t element_size() const override
+ size_t element_size() const override
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
return _parent->element_size();
@@ -196,12 +201,23 @@ public:
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
return _parent->is_dynamic();
}
+ bool are_values_constant() const override
+ {
+ ARM_COMPUTE_ERROR_ON(_parent == nullptr);
+ return _parent->are_values_constant();
+ }
ITensorInfo &set_is_resizable(bool is_resizable) override
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
_parent->set_is_resizable(is_resizable);
return *this;
}
+ ITensorInfo &set_are_values_constant(bool are_values_constant) override
+ {
+ ARM_COMPUTE_ERROR_ON(_parent == nullptr);
+ _parent->set_are_values_constant(are_values_constant);
+ return *this;
+ }
ValidRegion valid_region() const override
{
return _valid_region;
@@ -210,7 +226,7 @@ public:
{
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
// Check if subtensor is valid if parent is configured
- if(_parent->tensor_shape().total_size() != 0)
+ if (_parent->tensor_shape().total_size() != 0)
{
ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(_parent->valid_region(), valid_region);
}
@@ -226,6 +242,17 @@ public:
ARM_COMPUTE_ERROR_ON(_parent == nullptr);
return _parent->data_layout();
}
+ ITensorInfo::Id id() const override
+ {
+ ARM_COMPUTE_ERROR_ON(_parent == nullptr);
+ return _parent->id();
+ }
+ ITensorInfo &set_id(ITensorInfo::Id id) override
+ {
+ ARM_COMPUTE_ERROR_ON(_parent == nullptr);
+ _parent->set_id(id);
+ return *this;
+ }
private:
ITensorInfo *_parent;
@@ -234,6 +261,7 @@ private:
Coordinates _coords;
ValidRegion _valid_region;
bool _extend_parent;
+ bool _lock_paddings;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_SUBTENSORINFO_H */
diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h
index a4330849bf..b18f750427 100644
--- a/arm_compute/core/TensorInfo.h
+++ b/arm_compute/core/TensorInfo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,16 +24,14 @@
#ifndef ARM_COMPUTE_TENSORINFO_H
#define ARM_COMPUTE_TENSORINFO_H
-#include "arm_compute/core/ITensorInfo.h"
-
-#include "ITensorInfo.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Strides.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
+#include "ITensorInfo.h"
#include <cstddef>
#include <memory>
@@ -50,7 +48,7 @@ public:
/** Allow instances of this class to be copy constructed */
TensorInfo(const ITensorInfo &info);
/** Allow instances of this class to be copy constructed */
- TensorInfo(const TensorInfo &) = default;
+ TensorInfo(const TensorInfo &);
/** Allow instances of this class to be copied */
TensorInfo &operator=(const TensorInfo &) = default;
/** Allow instances of this class to be move constructed */
@@ -113,7 +111,10 @@ public:
* @param[in] data_type Data type to use for each tensor element
* @param[in] quantization_info The quantization settings for the tensor data.
*/
- TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, QuantizationInfo quantization_info);
+ TensorInfo(const TensorShape &tensor_shape,
+ size_t num_channels,
+ DataType data_type,
+ QuantizationInfo quantization_info);
/** Initialize the tensor info with just a format.
*
@@ -137,7 +138,11 @@ public:
* @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element.
* @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element).
*/
- void init(const TensorShape &tensor_shape, Format format, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, size_t total_size_in_bytes);
+ void init(const TensorShape &tensor_shape,
+ Format format,
+ const Strides &strides_in_bytes,
+ size_t offset_first_element_in_bytes,
+ size_t total_size_in_bytes);
/** Initialize the tensor info with just a format.
*
@@ -165,8 +170,12 @@ public:
* @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element.
* @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element).
*/
- void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes,
- size_t total_size_in_bytes);
+ void init(const TensorShape &tensor_shape,
+ size_t num_channels,
+ DataType data_type,
+ const Strides &strides_in_bytes,
+ size_t offset_first_element_in_bytes,
+ size_t total_size_in_bytes);
/** Initialize the metadata structure for the given tensor shape and single-plane format, (Padding is automatically calculated)
*
* @note The padding used by this method is really conservative so that the tensor can be used for most functions.
@@ -192,17 +201,19 @@ public:
// Inherited methods overridden:
std::unique_ptr<ITensorInfo> clone() const override;
- ITensorInfo &set_data_type(DataType data_type) override;
- ITensorInfo &set_num_channels(int num_channels) override;
- ITensorInfo &set_format(Format format) override;
- ITensorInfo &set_tensor_shape(const TensorShape &shape) override;
- ITensorInfo &set_tensor_dims_state(const TensorDimsState &state) override;
- ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override;
- ITensorInfo &set_data_layout(const DataLayout &data_layout) override;
- ITensorInfo &reset_padding() override;
- bool auto_padding() override;
- bool extend_padding(const PaddingSize &padding) override;
- size_t dimension(size_t index) const override
+ ITensorInfo &set_data_type(DataType data_type) override;
+ ITensorInfo &set_num_channels(int num_channels) override;
+ ITensorInfo &set_format(Format format) override;
+ ITensorInfo &set_tensor_shape(const TensorShape &shape) override;
+ ITensorInfo &set_tensor_dims_state(const TensorDimsState &state) override;
+ ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override;
+ ITensorInfo &set_data_layout(const DataLayout &data_layout) override;
+ ITensorInfo &reset_padding() override;
+ bool auto_padding() override;
+ ITensorInfo &set_lock_paddings(bool flag) override;
+ bool lock_paddings() const override;
+ bool extend_padding(const PaddingSize &padding) override;
+ size_t dimension(size_t index) const override
{
return _tensor_shape[index];
}
@@ -219,7 +230,7 @@ public:
return _offset_first_element_in_bytes;
}
int32_t offset_element_in_bytes(const Coordinates &pos) const override;
- size_t element_size() const override
+ size_t element_size() const override
{
return data_size_from_type(_data_type) * _num_channels;
}
@@ -265,7 +276,12 @@ public:
}
bool is_dynamic() const override
{
- return std::find(std::cbegin(_dims_state), std::cend(_dims_state), get_dynamic_state_value()) != std::cend(_dims_state);
+ return std::find(std::cbegin(_dims_state), std::cend(_dims_state), get_dynamic_state_value()) !=
+ std::cend(_dims_state);
+ }
+ bool are_values_constant() const override
+ {
+ return _are_values_constant;
}
ITensorInfo &set_is_resizable(bool is_resizable) override
{
@@ -288,6 +304,21 @@ public:
{
return _data_layout;
}
+ ITensorInfo &set_are_values_constant(bool are_values_constant) override
+ {
+ _are_values_constant = are_values_constant;
+ return *this;
+ }
+ ITensorInfo::Id id() const override
+ {
+ return _id;
+ }
+ ITensorInfo &set_id(ITensorInfo::Id id) override
+ {
+ _id = id;
+ return *this;
+ }
+ inline friend bool operator==(const TensorInfo &lhs, const TensorInfo &rhs);
private:
/** Calculates strides, offset and total size resulting from the specified padding around the XY plane.
@@ -309,6 +340,29 @@ private:
PaddingSize _padding;
QuantizationInfo _quantization_info;
DataLayout _data_layout;
+ bool _are_values_constant;
+ ITensorInfo::Id _id;
+ bool _lock_paddings;
};
+
+/** Check whether two tensor info are equal.
+ *
+ * @param[in] lhs LHS tensor info.
+ * @param[in] rhs RHS tensor info.
+ *
+ * @return True if the given tensor infos are the same.
+ */
+inline bool operator==(const TensorInfo &lhs, const TensorInfo &rhs)
+{
+ return (lhs._total_size == rhs._total_size) &&
+ (lhs._offset_first_element_in_bytes == rhs._offset_first_element_in_bytes) &&
+ (lhs._strides_in_bytes == rhs._strides_in_bytes) && (lhs._num_channels == rhs._num_channels) &&
+ (lhs._tensor_shape == rhs._tensor_shape) && (lhs._dims_state == rhs._dims_state) &&
+ (lhs._data_type == rhs._data_type) && (lhs._format == rhs._format) &&
+ (lhs._is_resizable == rhs._is_resizable) && (lhs._valid_region == rhs._valid_region) &&
+ (lhs._padding == rhs._padding) && (lhs._quantization_info == rhs._quantization_info) &&
+ (lhs._data_layout == rhs._data_layout) && (lhs._are_values_constant == rhs._are_values_constant) &&
+ (lhs._id == rhs._id);
+}
} // namespace arm_compute
#endif /*ARM_COMPUTE_TENSORINFO_H */
diff --git a/arm_compute/core/TensorShape.h b/arm_compute/core/TensorShape.h
index b6ab9dc75a..c1707e262f 100644
--- a/arm_compute/core/TensorShape.h
+++ b/arm_compute/core/TensorShape.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,11 +44,10 @@ public:
* @param[in] dims Values to initialize the dimensions.
*/
template <typename... Ts>
- TensorShape(Ts... dims)
- : Dimensions{ dims... }
+ TensorShape(Ts... dims) : Dimensions{dims...}
{
// Initialize unspecified dimensions to 1
- if(_num_dimensions > 0)
+ if (_num_dimensions > 0)
{
std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
}
@@ -79,7 +78,7 @@ public:
TensorShape &set(size_t dimension, size_t value, bool apply_dim_correction = true, bool increase_dim_unit = true)
{
// Clear entire shape if one dimension is zero
- if(value == 0)
+ if (value == 0)
{
_num_dimensions = 0;
std::fill(_id.begin(), _id.end(), 0);
@@ -94,7 +93,7 @@ public:
Dimensions::set(dimension, value, increase_dim_unit);
// Correct number dimensions to ignore trailing dimensions of size 1
- if(apply_dim_correction)
+ if (apply_dim_correction)
{
apply_dimension_correction();
}
@@ -106,9 +105,10 @@ public:
*
* @note The upper dimensions of the tensor shape will be shifted down by 1
*
- * @param[in] n Dimension to remove
+ * @param[in] n Dimension to remove
+ * @param[in] apply_dim_correction (Optional) Flag to state whether apply dimension correction (removing trailing dimensions with size of 1) after removing a dimension.
*/
- void remove_dimension(size_t n)
+ void remove_dimension(size_t n, bool apply_dim_correction = true)
{
ARM_COMPUTE_ERROR_ON(_num_dimensions < 1);
ARM_COMPUTE_ERROR_ON(n >= _num_dimensions);
@@ -122,7 +122,10 @@ public:
std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
// Correct number dimensions to ignore trailing dimensions of size 1
- apply_dimension_correction();
+ if (apply_dim_correction)
+ {
+ apply_dimension_correction();
+ }
}
/** Collapse the first n dimensions.
@@ -208,26 +211,26 @@ public:
* @return The broadcasted shape or an empty shape if the shapes are not broadcast compatible.
*/
template <typename... Shapes>
- static TensorShape broadcast_shape(const Shapes &... shapes)
+ static TensorShape broadcast_shape(const Shapes &...shapes)
{
TensorShape bc_shape;
- auto broadcast = [&bc_shape](const TensorShape & other)
+ auto broadcast = [&bc_shape](const TensorShape &other)
{
- if(bc_shape.num_dimensions() == 0)
+ if (bc_shape.num_dimensions() == 0)
{
bc_shape = other;
}
- else if(other.num_dimensions() != 0)
+ else if (other.num_dimensions() != 0)
{
- for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
+ for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
{
const size_t dim_min = std::min(bc_shape[d], other[d]);
const size_t dim_max = std::max(bc_shape[d], other[d]);
- if((dim_min != 1) && (dim_min != dim_max))
+ if ((dim_min != 1) && (dim_min != dim_max))
{
- bc_shape = TensorShape{ 0U };
+ bc_shape = TensorShape{0U};
break;
}
@@ -245,9 +248,9 @@ private:
/** Remove trailing dimensions of size 1 from the reported number of dimensions. */
void apply_dimension_correction()
{
- for(int i = static_cast<int>(_num_dimensions) - 1; i > 0; --i)
+ for (int i = static_cast<int>(_num_dimensions) - 1; i > 0; --i)
{
- if(_id[i] == 1)
+ if (_id[i] == 1)
{
--_num_dimensions;
}
@@ -258,5 +261,5 @@ private:
}
}
};
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_TENSORSHAPE_H*/
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 48c87cd8ac..f2f60c150e 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,17 +21,52 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TYPES_H
-#define ARM_COMPUTE_TYPES_H
-
+#ifndef ACL_ARM_COMPUTE_CORE_TYPES_H
+#define ACL_ARM_COMPUTE_CORE_TYPES_H
+
+/** The following symbols have been moved to:
+ * half
+ * PermutationVector
+ * Format
+ * DataType
+ * DataLayout
+ * DataLayoutDimension
+ * PadStrideInfo
+ * WeightFormat
+ * Channel
+ * DimensionRoundingType
+ */
+#include "arm_compute/core/CoreTypes.h"
+/** The following symbols have been moved to:
+ * ActivationFunction
+ * ActivationLayerInfo
+ */
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+/** The following symbols have been moved to:
+ * ConvolutionInfo
+ */
+#include "arm_compute/function_info/ConvolutionInfo.h"
+/** The following symbols have been moved to:
+ * FullyConnectedLayerInfo
+ */
+#include "arm_compute/function_info/FullyConnectedLayerInfo.h"
+/** The following symbols have been moved to:
+ * GEMMLowpOutputStageType
+ * GEMMLowpOutputStageInfo
+ * GEMMInfo
+ */
+#include "arm_compute/function_info/GEMMInfo.h"
+/** The following symbols have been moved to:
+ * MatMulInfo
+ */
#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/QuantizationInfo.h"
#include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Strides.h"
+#include "arm_compute/core/Size3D.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/utils/misc/Macros.h"
+#include "arm_compute/function_info/MatMulInfo.h"
+
#include "support/Bfloat16.h"
-#include "support/Half.h"
#include <cmath>
#include <cstddef>
@@ -42,62 +77,9 @@
namespace arm_compute
{
-/** 16-bit floating point type */
-using half = half_float::half;
-
-/** Permutation vector */
-using PermutationVector = Strides;
/** Bidirectional strides */
using BiStrides = Coordinates;
-/** Image colour formats */
-enum class Format
-{
- UNKNOWN, /**< Unknown image format */
- U8, /**< 1 channel, 1 U8 per channel */
- S16, /**< 1 channel, 1 S16 per channel */
- U16, /**< 1 channel, 1 U16 per channel */
- S32, /**< 1 channel, 1 S32 per channel */
- U32, /**< 1 channel, 1 U32 per channel */
- BFLOAT16, /**< 16-bit brain floating-point number */
- F16, /**< 1 channel, 1 F16 per channel */
- F32, /**< 1 channel, 1 F32 per channel */
- UV88, /**< 2 channel, 1 U8 per channel */
- RGB888, /**< 3 channels, 1 U8 per channel */
- RGBA8888, /**< 4 channels, 1 U8 per channel */
- YUV444, /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */
- YUYV422, /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */
- NV12, /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */
- NV21, /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */
- IYUV, /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */
- UYVY422 /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */
-};
-
-/** Available data types */
-enum class DataType
-{
- UNKNOWN, /**< Unknown data type */
- U8, /**< unsigned 8-bit number */
- S8, /**< signed 8-bit number */
- QSYMM8, /**< quantized, symmetric fixed-point 8-bit number */
- QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number unsigned */
- QASYMM8_SIGNED, /**< quantized, asymmetric fixed-point 8-bit number signed */
- QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */
- U16, /**< unsigned 16-bit number */
- S16, /**< signed 16-bit number */
- QSYMM16, /**< quantized, symmetric fixed-point 16-bit number */
- QASYMM16, /**< quantized, asymmetric fixed-point 16-bit number */
- U32, /**< unsigned 32-bit number */
- S32, /**< signed 32-bit number */
- U64, /**< unsigned 64-bit number */
- S64, /**< signed 64-bit number */
- BFLOAT16, /**< 16-bit brain floating-point number */
- F16, /**< 16-bit floating-point number */
- F32, /**< 32-bit floating-point number */
- F64, /**< 64-bit floating-point number */
- SIZET /**< size_t */
-};
-
/** Available Sampling Policies */
enum class SamplingPolicy
{
@@ -105,32 +87,13 @@ enum class SamplingPolicy
TOP_LEFT /**< Samples are taken at pixel top left corner */
};
-/** [DataLayout enum definition] **/
-
-/** Supported tensor data layouts */
-enum class DataLayout
-{
- UNKNOWN, /**< Unknown data layout */
- NCHW, /**< Num samples, channels, height, width */
- NHWC /**< Num samples, height, width, channels */
-};
-/** [DataLayout enum definition] **/
-
-/** Supported tensor data layout dimensions */
-enum class DataLayoutDimension
-{
- CHANNEL, /**< channel */
- HEIGHT, /**< height */
- WIDTH, /**< width */
- BATCHES /**< batches */
-};
-
/** Available ConvolutionMethod*/
enum class ConvolutionMethod
{
GEMM, /**< Convolution using GEMM */
GEMM_CONV2D, /**< Direct 2D GEMM convolution */
DIRECT, /**< Direct convolution */
+ INDIRECT, /**< Indirect convolution */
WINOGRAD, /**< Convolution using Winograd */
FFT /**< Convolution using FFT */
};
@@ -145,8 +108,9 @@ enum class DepthwiseConvolutionFunction
/** Available DeconvolutionMethod*/
enum class DeconvolutionMethod
{
- GEMM, /**< Deconvolution using GEMM */
- DIRECT, /**< Direct deconvolution */
+ GEMM, /**< Deconvolution using GEMM */
+ DIRECT, /**< Direct deconvolution */
+ UPSCALE_CONV2D /**< Deconvolution with Upscaling */
};
/** Available FuseBatchNormalizationType*/
@@ -179,8 +143,7 @@ enum class ComparisonOperation
struct ValidRegion
{
/** Default constructor */
- ValidRegion()
- : anchor{}, shape{}
+ ValidRegion() : anchor{}, shape{}
{
}
@@ -201,8 +164,7 @@ struct ValidRegion
* @param[in] a_shape Shape of the valid region.
*
*/
- ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape)
- : anchor{ an_anchor }, shape{ a_shape }
+ ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape) : anchor{an_anchor}, shape{a_shape}
{
anchor.set_num_dimensions(std::max(anchor.num_dimensions(), shape.num_dimensions()));
}
@@ -215,7 +177,7 @@ struct ValidRegion
*
*/
ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape, size_t num_dimensions)
- : anchor{ an_anchor }, shape{ a_shape }
+ : anchor{an_anchor}, shape{a_shape}
{
ARM_COMPUTE_ERROR_ON(num_dimensions < std::max(anchor.num_dimensions(), shape.num_dimensions()));
anchor.set_num_dimensions(num_dimensions);
@@ -248,9 +210,22 @@ struct ValidRegion
return *this;
}
+ /** Check whether two valid regions are equal.
+ *
+ * @param[in] lhs LHS valid region
+ * @param[in] rhs RHS valid region
+ *
+ * @return True if the valid regions are the same.
+ */
+ inline friend bool operator==(const ValidRegion &lhs, const ValidRegion &rhs);
+
Coordinates anchor; /**< Anchor for the start of the valid region. */
TensorShape shape; /**< Shape of the valid region. */
};
+inline bool operator==(const ValidRegion &lhs, const ValidRegion &rhs)
+{
+ return (lhs.anchor == rhs.anchor) && (lhs.shape == rhs.shape);
+}
/** Methods available to handle borders */
enum class BorderMode
@@ -264,32 +239,24 @@ enum class BorderMode
struct BorderSize
{
/** Empty border, i.e. no border */
- constexpr BorderSize() noexcept
- : top{ 0 },
- right{ 0 },
- bottom{ 0 },
- left{ 0 }
+ constexpr BorderSize() noexcept : top{0}, right{0}, bottom{0}, left{0}
{
}
/** Border with equal size around the 2D plane */
- explicit constexpr BorderSize(unsigned int size) noexcept
- : top{ size },
- right{ size },
- bottom{ size },
- left{ size }
+ explicit constexpr BorderSize(unsigned int size) noexcept : top{size}, right{size}, bottom{size}, left{size}
{
}
/** Border with same size for top/bottom and left/right */
constexpr BorderSize(unsigned int top_bottom, unsigned int left_right)
- : top{ top_bottom }, right{ left_right }, bottom{ top_bottom }, left{ left_right }
+ : top{top_bottom}, right{left_right}, bottom{top_bottom}, left{left_right}
{
}
/** Border with different sizes */
constexpr BorderSize(unsigned int top, unsigned int right, unsigned int bottom, unsigned int left)
- : top{ top }, right{ right }, bottom{ bottom }, left{ left }
+ : top{top}, right{right}, bottom{bottom}, left{left}
{
}
@@ -341,7 +308,7 @@ struct BorderSize
*
* @return true if they are equal
*/
- bool operator==(const BorderSize &rhs)
+ bool operator==(const BorderSize &rhs) const
{
return (top == rhs.top) && (right == rhs.right) && (bottom == rhs.bottom) && (left == rhs.left);
}
@@ -352,7 +319,7 @@ struct BorderSize
*
* @return true if they are different
*/
- bool operator!=(const BorderSize &rhs)
+ bool operator!=(const BorderSize &rhs) const
{
return !(*this == rhs);
}
@@ -378,7 +345,11 @@ struct BorderSize
/** Container for 2D padding size */
using PaddingSize = BorderSize;
-/** Policy to handle overflow */
+/** Policy to handle integer overflow
+ * @note: This is ignored by floating point operations where the overflow behavior adheres to the IEEE-754 standard
+ * which states that in case of overflow ±infinity is returned for the round-to-nearest modes (and follows the
+ * rounding rules for the directed rounding modes) by default.
+ */
enum class ConvertPolicy
{
WRAP, /**< Wrap around */
@@ -390,7 +361,7 @@ enum class InterpolationPolicy
{
NEAREST_NEIGHBOR, /**< Output values are defined to match the source pixel whose center is nearest to the sample position */
BILINEAR, /**< Output values are defined by bilinear interpolation between the pixels */
- AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */
+ AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */
};
/** Bilinear Interpolation method used by LKTracker */
@@ -433,23 +404,6 @@ using PaddingList = std::vector<PaddingInfo>;
/** Information to produce a tiled version of a Tensor */
using Multiples = std::vector<uint32_t>;
-/** Available channels */
-enum class Channel
-{
- UNKNOWN, /** Unknown channel format */
- C0, /**< First channel (used by formats with unknown channel types). */
- C1, /**< Second channel (used by formats with unknown channel types). */
- C2, /**< Third channel (used by formats with unknown channel types). */
- C3, /**< Fourth channel (used by formats with unknown channel types). */
- R, /**< Red channel. */
- G, /**< Green channel. */
- B, /**< Blue channel. */
- A, /**< Alpha channel. */
- Y, /**< Luma channel. */
- U, /**< Cb/U channel. */
- V /**< Cr/V/Value channel. */
-};
-
/** Available reduction operations */
enum class ReductionOperation
{
@@ -514,21 +468,12 @@ enum class NormType
*/
struct DetectionWindow
{
- uint16_t x{ 0 }; /**< Top-left x coordinate */
- uint16_t y{ 0 }; /**< Top-left y coordinate */
- uint16_t width{ 0 }; /**< Width of the detection window */
- uint16_t height{ 0 }; /**< Height of the detection window */
- uint16_t idx_class{ 0 }; /**< Index of the class */
- float score{ 0.f }; /**< Confidence value for the detection window */
-};
-
-/** Dimension rounding type when down-scaling on CNNs
- * @note Used in pooling and convolution layer
- */
-enum class DimensionRoundingType
-{
- FLOOR, /**< Floor rounding */
- CEIL /**< Ceil rounding */
+ uint16_t x{0}; /**< Top-left x coordinate */
+ uint16_t y{0}; /**< Top-left y coordinate */
+ uint16_t width{0}; /**< Width of the detection window */
+ uint16_t height{0}; /**< Height of the detection window */
+ uint16_t idx_class{0}; /**< Index of the class */
+ float score{0.f}; /**< Confidence value for the detection window */
};
/** Available pooling types */
@@ -565,12 +510,28 @@ public:
* @param[in] im_width (Optional) Boxes whose centers (on the x axis) is beyond im_width will be filtered. Defaults to 1
* @param[in] im_height (Optional) Boxes whose centers (on the y axis) is beyond im_height will be filtered. Defaults to 1
*/
- BoxNMSLimitInfo(float score_thresh = 0.05f, float nms = 0.3f,
- int detections = 100, bool soft_nms_enabled = false,
- NMSType soft_nms_method = NMSType::LINEAR,
- float soft_nms_sigma = 0.5f, float soft_nms_min_score_thres = 0.001f, bool suppress_size = false, float min_size = 1.0f, float im_width = 1.0f, float im_height = 1.0f)
- : _score_thresh(score_thresh), _nms(nms), _detections_per_im(detections), _soft_nms_enabled(soft_nms_enabled), _soft_nms_method(soft_nms_method), _soft_nms_sigma(soft_nms_sigma),
- _soft_nms_min_score_thres(soft_nms_min_score_thres), _suppress_size(suppress_size), _min_size(min_size), _im_width(im_width), _im_height(im_height)
+ BoxNMSLimitInfo(float score_thresh = 0.05f,
+ float nms = 0.3f,
+ int detections = 100,
+ bool soft_nms_enabled = false,
+ NMSType soft_nms_method = NMSType::LINEAR,
+ float soft_nms_sigma = 0.5f,
+ float soft_nms_min_score_thres = 0.001f,
+ bool suppress_size = false,
+ float min_size = 1.0f,
+ float im_width = 1.0f,
+ float im_height = 1.0f)
+ : _score_thresh(score_thresh),
+ _nms(nms),
+ _detections_per_im(detections),
+ _soft_nms_enabled(soft_nms_enabled),
+ _soft_nms_method(soft_nms_method),
+ _soft_nms_sigma(soft_nms_sigma),
+ _soft_nms_min_score_thres(soft_nms_min_score_thres),
+ _suppress_size(suppress_size),
+ _min_size(min_size),
+ _im_width(im_width),
+ _im_height(im_height)
{
}
/** Get the score threshold */
@@ -644,120 +605,42 @@ private:
};
/** Padding and stride information class */
-class PadStrideInfo
+/** Padding information for 2D operations like Conv2d */
+struct Padding2D
{
-public:
- /** Constructor
- *
- * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1.
- * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1.
- * @param[in] pad_x (Optional) Padding, in elements, across x. Defaults to 0.
- * @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0.
- * @param[in] round (Optional) Dimensions rounding. Defaults to @ref FLOOR.
- */
- PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1,
- unsigned int pad_x = 0, unsigned int pad_y = 0,
- DimensionRoundingType round = DimensionRoundingType::FLOOR)
- : _stride(std::make_pair(stride_x, stride_y)),
- _pad_left(pad_x),
- _pad_top(pad_y),
- _pad_right(pad_x),
- _pad_bottom(pad_y),
- _round_type(round)
- {
- }
- /** Constructor
- *
- * @param[in] stride_x Stride, in elements, across x.
- * @param[in] stride_y Stride, in elements, across y.
- * @param[in] pad_left Padding across x on the left, in elements.
- * @param[in] pad_top Padding across y on the top, in elements.
- * @param[in] pad_right Padding across x on the right, in elements.
- * @param[in] pad_bottom Padding across y on the bottom, in elements.
- * @param[in] round Dimensions rounding.
- */
- PadStrideInfo(unsigned int stride_x, unsigned int stride_y,
- unsigned int pad_left, unsigned int pad_right,
- unsigned int pad_top, unsigned int pad_bottom,
- DimensionRoundingType round)
- : _stride(std::make_pair(stride_x, stride_y)),
- _pad_left(pad_left),
- _pad_top(pad_top),
- _pad_right(pad_right),
- _pad_bottom(pad_bottom),
- _round_type(round)
- {
- }
- /** Get the stride.
- *
- * @return a pair: stride x, stride y.
- */
- std::pair<unsigned int, unsigned int> stride() const
- {
- return _stride;
- }
- /** Check whether the padding is symmetric.
- *
- * @return True if the padding is symmetric.
- */
- bool padding_is_symmetric() const
- {
- return (_pad_left == _pad_right) && (_pad_top == _pad_bottom);
- }
- /** Get the padding.
- *
- * @note This should only be used when the padding is symmetric.
- *
- * @return a pair: padding left/right, padding top/bottom
- */
- std::pair<unsigned int, unsigned int> pad() const
+ Padding2D() = default;
+ Padding2D(size_t left, size_t right, size_t top, size_t bottom) : left(left), right(right), top(top), bottom(bottom)
{
- //this accessor should be used only when padding is symmetric
- ARM_COMPUTE_ERROR_ON(!padding_is_symmetric());
- return std::make_pair(_pad_left, _pad_top);
}
+ size_t left = {0}; /**< Padding across the width dimension on the left, in elements. */
+ size_t right = {0}; /**< Padding across the width dimension on the right, in elements. */
+ size_t top = {0}; /**< Padding across the height dimension on the top, in elements. */
+ size_t bottom = {0}; /**< Padding across the height dimension on the bottom, in elements. */
+};
- /** Get the left padding */
- unsigned int pad_left() const
- {
- return _pad_left;
- }
- /** Get the right padding */
- unsigned int pad_right() const
- {
- return _pad_right;
- }
- /** Get the top padding */
- unsigned int pad_top() const
- {
- return _pad_top;
- }
- /** Get the bottom padding */
- unsigned int pad_bottom() const
+/** Padding information for 3D operations like Conv3d */
+struct Padding3D
+{
+ Padding3D() noexcept
{
- return _pad_bottom;
}
- /** Get the rounding type */
- DimensionRoundingType round() const
+ Padding3D(size_t pad_x, size_t pad_y, size_t pad_z)
+ : left(pad_x), right(pad_x), top(pad_y), bottom(pad_y), front(pad_z), back(pad_z)
{
- return _round_type;
}
- /** Check whether this has any padding */
- bool has_padding() const
+ Padding3D(size_t left, size_t right, size_t top, size_t bottom, size_t front, size_t back)
+ : left(left), right(right), top(top), bottom(bottom), front(front), back(back)
{
- return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0);
}
-private:
- std::pair<unsigned int, unsigned int> _stride;
- unsigned int _pad_left;
- unsigned int _pad_top;
- unsigned int _pad_right;
- unsigned int _pad_bottom;
-
- DimensionRoundingType _round_type;
+ size_t left = {0}; /**< Padding across the width dimenstion on the left, in elements. */
+ size_t right = {0}; /**< Padding across the width dimenstion on the right, in elements. */
+ size_t top = {0}; /**< Padding across the height dimenstion on the top, in elements. */
+ size_t bottom = {0}; /**< Padding across the height dimenstion on the bottom, in elements. */
+ size_t front = {0}; /**< Padding across the depth dimenstion on the front, in elements. */
+ size_t back = {0}; /**< Padding across the depth dimenstion on the back, in elements. */
};
/** PriorBox layer info */
@@ -789,9 +672,15 @@ public:
* @param[in] img_size (Optional) Image size.
* @param[in] steps (Optional) Step values.
*/
- PriorBoxLayerInfo(const std::vector<float> &min_sizes, const std::vector<float> &variances, float offset, bool flip = true, bool clip = false,
- const std::vector<float> &max_sizes = {}, const std::vector<float> &aspect_ratios = {},
- const Coordinates2D &img_size = Coordinates2D{ 0, 0 }, const std::array<float, 2> &steps = { { 0.f, 0.f } })
+ PriorBoxLayerInfo(const std::vector<float> &min_sizes,
+ const std::vector<float> &variances,
+ float offset,
+ bool flip = true,
+ bool clip = false,
+ const std::vector<float> &max_sizes = {},
+ const std::vector<float> &aspect_ratios = {},
+ const Coordinates2D &img_size = Coordinates2D{0, 0},
+ const std::array<float, 2> &steps = {{0.f, 0.f}})
: _min_sizes(min_sizes),
_variances(variances),
_offset(offset),
@@ -803,22 +692,22 @@ public:
_steps(steps)
{
_aspect_ratios.push_back(1.);
- for(unsigned int i = 0; i < aspect_ratios.size(); ++i)
+ for (unsigned int i = 0; i < aspect_ratios.size(); ++i)
{
float ar = aspect_ratios[i];
bool already_exist = false;
- for(auto ar_new : _aspect_ratios)
+ for (auto ar_new : _aspect_ratios)
{
- if(fabs(ar - ar_new) < 1e-6)
+ if (fabs(ar - ar_new) < 1e-6)
{
already_exist = true;
break;
}
}
- if(!already_exist)
+ if (!already_exist)
{
_aspect_ratios.push_back(ar);
- if(flip)
+ if (flip)
{
_aspect_ratios.push_back(1.f / ar);
}
@@ -872,14 +761,14 @@ public:
}
private:
- std::vector<float> _min_sizes;
- std::vector<float> _variances;
- float _offset;
- bool _flip;
- bool _clip;
- std::vector<float> _max_sizes;
- std::vector<float> _aspect_ratios;
- Coordinates2D _img_size;
+ std::vector<float> _min_sizes;
+ std::vector<float> _variances;
+ float _offset;
+ bool _flip;
+ bool _clip;
+ std::vector<float> _max_sizes;
+ std::vector<float> _aspect_ratios;
+ Coordinates2D _img_size;
std::array<float, 2> _steps;
};
@@ -930,8 +819,16 @@ public:
* @param[in] variance_encoded_in_target (Optional) If true, variance is encoded in target. Otherwise we need to adjust the predicted offset accordingly.Default set to false.
* @param[in] eta (Optional) Eta.
*/
- DetectionOutputLayerInfo(int num_classes, bool share_location, DetectionOutputLayerCodeType code_type, int keep_top_k, float nms_threshold, int top_k = -1, int background_label_id = -1,
- float confidence_threshold = std::numeric_limits<float>::lowest(), bool variance_encoded_in_target = false, float eta = 1)
+ DetectionOutputLayerInfo(int num_classes,
+ bool share_location,
+ DetectionOutputLayerCodeType code_type,
+ int keep_top_k,
+ float nms_threshold,
+ int top_k = -1,
+ int background_label_id = -1,
+ float confidence_threshold = std::numeric_limits<float>::lowest(),
+ bool variance_encoded_in_target = false,
+ float eta = 1)
: _num_classes(num_classes),
_share_location(share_location),
_code_type(code_type),
@@ -1045,8 +942,15 @@ public:
* @param[in] detection_per_class (Optional) Number of detection per class. Used in the Regular Non-Max-Suppression. Defaults to 100.
* @param[in] dequantize_scores (Optional) If the scores need to be dequantized. Defaults to true.
*/
- DetectionPostProcessLayerInfo(unsigned int max_detections, unsigned int max_classes_per_detection, float nms_score_threshold, float iou_threshold, unsigned int num_classes,
- std::array<float, 4> scales_values, bool use_regular_nms = false, unsigned int detection_per_class = 100, bool dequantize_scores = true)
+ DetectionPostProcessLayerInfo(unsigned int max_detections,
+ unsigned int max_classes_per_detection,
+ float nms_score_threshold,
+ float iou_threshold,
+ unsigned int num_classes,
+ std::array<float, 4> scales_values,
+ bool use_regular_nms = false,
+ unsigned int detection_per_class = 100,
+ bool dequantize_scores = true)
: _max_detections(max_detections),
_max_classes_per_detection(max_classes_per_detection),
_nms_score_threshold(nms_score_threshold),
@@ -1124,15 +1028,15 @@ public:
}
private:
- unsigned int _max_detections;
- unsigned int _max_classes_per_detection;
- float _nms_score_threshold;
- float _iou_threshold;
- unsigned int _num_classes;
+ unsigned int _max_detections;
+ unsigned int _max_classes_per_detection;
+ float _nms_score_threshold;
+ float _iou_threshold;
+ unsigned int _num_classes;
std::array<float, 4> _scales_values;
- bool _use_regular_nms;
- unsigned int _detection_per_class;
- bool _dequantize_scores;
+ bool _use_regular_nms;
+ unsigned int _detection_per_class;
+ bool _dequantize_scores;
};
/** Pooling Layer Information struct*/
@@ -1146,7 +1050,9 @@ struct PoolingLayerInfo
pad_stride_info(PadStrideInfo()),
exclude_padding(false),
is_global_pooling(false),
- fp_mixed_precision(false)
+ fp_mixed_precision(false),
+ use_inf_as_limit(true),
+ use_kernel_indices(false)
{
}
/** Constructor
@@ -1159,20 +1065,26 @@ struct PoolingLayerInfo
* True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
* Defaults to false;
* @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+ * @param[in] use_inf_as_limit (Optional) Use inf to represent the limits of datatypes range, instead of using "lowest" property of the data type.
+ * @param[in] use_kernel_indices (Optional) Use kernel indices instead of using source indices while computing indices tensor.
*/
explicit PoolingLayerInfo(PoolingType pool_type,
unsigned int pool_size,
DataLayout data_layout,
PadStrideInfo pad_stride_info = PadStrideInfo(),
bool exclude_padding = false,
- bool fp_mixed_precision = false)
+ bool fp_mixed_precision = false,
+ bool use_inf_as_limit = true,
+ bool use_kernel_indices = false)
: pool_type(pool_type),
pool_size(Size2D(pool_size, pool_size)),
data_layout(data_layout),
pad_stride_info(pad_stride_info),
exclude_padding(exclude_padding),
is_global_pooling(false),
- fp_mixed_precision(fp_mixed_precision)
+ fp_mixed_precision(fp_mixed_precision),
+ use_inf_as_limit(use_inf_as_limit),
+ use_kernel_indices(use_kernel_indices)
{
}
@@ -1186,20 +1098,26 @@ struct PoolingLayerInfo
* True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
* Defaults to false;
* @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+ * @param[in] use_inf_as_limit (Optional) Use inf to represent the limits of datatypes range, instead of using "lowest" property of the data type.
+ * @param[in] use_kernel_indices (Optional) Use kernel indices instead of using source indices while computing indices tensor.
*/
explicit PoolingLayerInfo(PoolingType pool_type,
Size2D pool_size,
DataLayout data_layout,
PadStrideInfo pad_stride_info = PadStrideInfo(),
bool exclude_padding = false,
- bool fp_mixed_precision = false)
+ bool fp_mixed_precision = false,
+ bool use_inf_as_limit = true,
+ bool use_kernel_indices = false)
: pool_type(pool_type),
pool_size(pool_size),
data_layout(data_layout),
pad_stride_info(pad_stride_info),
exclude_padding(exclude_padding),
is_global_pooling(false),
- fp_mixed_precision(fp_mixed_precision)
+ fp_mixed_precision(fp_mixed_precision),
+ use_inf_as_limit(use_inf_as_limit),
+ use_kernel_indices(use_kernel_indices)
{
}
@@ -1217,7 +1135,9 @@ struct PoolingLayerInfo
pad_stride_info(PadStrideInfo(1, 1, 0, 0)),
exclude_padding(false),
is_global_pooling(true),
- fp_mixed_precision(false)
+ fp_mixed_precision(false),
+ use_inf_as_limit(true),
+ use_kernel_indices(false)
{
}
@@ -1228,6 +1148,111 @@ struct PoolingLayerInfo
bool exclude_padding;
bool is_global_pooling;
bool fp_mixed_precision;
+ bool use_inf_as_limit;
+ bool use_kernel_indices;
+};
+
+/** Pooling Layer Information struct*/
+struct Pooling3dLayerInfo
+{
+ /** Default Constructor */
+ Pooling3dLayerInfo() noexcept
+ : pool_type(PoolingType::MAX),
+ pool_size(Size3D()),
+ stride(Size3D()),
+ padding(Padding3D()),
+ exclude_padding(false),
+ is_global_pooling(false),
+ fp_mixed_precision(false),
+ round_type(DimensionRoundingType::FLOOR)
+ {
+ }
+ /** Constructor
+ *
+ * @param[in] pool_type Pooling type @ref PoolingType.
+ * @param[in] pool_size Pooling size, in elements, across x, y and z.
+ * @param[in] stride (Optional) stride information @ref Size3D
+ * @param[in] padding (Optional) padding information @ref Padding3D
+ * @param[in] exclude_padding (Optional) Strategy when accounting padding in calculations.
+ * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
+ * Defaults to false;
+ * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+ * @param[in] round_type (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR
+ */
+ explicit Pooling3dLayerInfo(PoolingType pool_type,
+ unsigned int pool_size,
+ Size3D stride = Size3D(1U, 1U, 1U),
+ Padding3D padding = Padding3D(),
+ bool exclude_padding = false,
+ bool fp_mixed_precision = false,
+ DimensionRoundingType round_type = DimensionRoundingType::FLOOR)
+ : pool_type(pool_type),
+ pool_size(Size3D(pool_size, pool_size, pool_size)),
+ stride(stride),
+ padding(padding),
+ exclude_padding(exclude_padding),
+ is_global_pooling(false),
+ fp_mixed_precision(fp_mixed_precision),
+ round_type(round_type)
+ {
+ }
+
+ /** Constructor
+ *
+ * @param[in] pool_type Pooling type @ref PoolingType.
+ * @param[in] pool_size Pooling size, in elements, across x, y and z.
+ * @param[in] stride (Optional) stride information @ref Size3D
+ * @param[in] padding (Optional) padding information @ref Padding3D
+ * @param[in] exclude_padding (Optional) Strategy when accounting padding in calculations.
+ * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
+ * Defaults to false;
+ * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+ * @param[in] round_type (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR
+ */
+ explicit Pooling3dLayerInfo(PoolingType pool_type,
+ Size3D pool_size,
+ Size3D stride = Size3D(1U, 1U, 1U),
+ Padding3D padding = Padding3D(),
+ bool exclude_padding = false,
+ bool fp_mixed_precision = false,
+ DimensionRoundingType round_type = DimensionRoundingType::FLOOR)
+ : pool_type(pool_type),
+ pool_size(pool_size),
+ stride(stride),
+ padding(padding),
+ exclude_padding(exclude_padding),
+ is_global_pooling(false),
+ fp_mixed_precision(fp_mixed_precision),
+ round_type(round_type)
+ {
+ }
+
+ /** Constructor
+ *
+ * @note This constructor is used for global pooling
+ *
+ * @param[in] pool_type Pooling type @ref PoolingType.
+ */
+ explicit Pooling3dLayerInfo(PoolingType pool_type)
+ : pool_type(pool_type),
+ pool_size(Size3D()),
+ stride(Size3D(1U, 1U, 1U)),
+ padding(Padding3D(0, 0, 0)),
+ exclude_padding(false),
+ is_global_pooling(true),
+ fp_mixed_precision(false),
+ round_type(DimensionRoundingType::FLOOR)
+ {
+ }
+
+ PoolingType pool_type;
+ Size3D pool_size;
+ Size3D stride;
+ Padding3D padding;
+ bool exclude_padding;
+ bool is_global_pooling;
+ bool fp_mixed_precision;
+ DimensionRoundingType round_type;
};
/** ROI Pooling Layer Information class */
@@ -1241,8 +1266,14 @@ public:
* @param[in] spatial_scale Spatial scale to be applied to the ROI coordinates and dimensions.
* @param[in] sampling_ratio Number of samples to include in each pooling region (if set to zero, a ceil(roi_dims/pooling_dims))
*/
- ROIPoolingLayerInfo(unsigned int pooled_width, unsigned int pooled_height, float spatial_scale, unsigned int sampling_ratio = 0)
- : _pooled_width(pooled_width), _pooled_height(pooled_height), _spatial_scale(spatial_scale), _sampling_ratio(sampling_ratio)
+ ROIPoolingLayerInfo(unsigned int pooled_width,
+ unsigned int pooled_height,
+ float spatial_scale,
+ unsigned int sampling_ratio = 0)
+ : _pooled_width(pooled_width),
+ _pooled_height(pooled_height),
+ _spatial_scale(spatial_scale),
+ _sampling_ratio(sampling_ratio)
{
}
/** Get the pooled width of the layer */
@@ -1289,10 +1320,24 @@ public:
* @param[in] min_size (Optional)Size used to validate the anchors produced. Defaults to 16.
* @param[in] values_per_roi (Optional)Values used to represent a ROI(Region of interest). Defaults to 4.
*/
- GenerateProposalsInfo(float im_width, float im_height, float im_scale, float spatial_scale = 1.0, int pre_nms_topN = 6000, int post_nms_topN = 300, float nms_thres = 0.7, float min_size = 16.0,
+ GenerateProposalsInfo(float im_width,
+ float im_height,
+ float im_scale,
+ float spatial_scale = 1.0,
+ int pre_nms_topN = 6000,
+ int post_nms_topN = 300,
+ float nms_thres = 0.7,
+ float min_size = 16.0,
size_t values_per_roi = 4)
- : _im_height(im_height), _im_width(im_width), _im_scale(im_scale), _spatial_scale(spatial_scale), _pre_nms_topN(pre_nms_topN), _post_nms_topN(post_nms_topN), _nms_thres(nms_thres),
- _min_size(min_size), _values_per_roi(values_per_roi)
+ : _im_height(im_height),
+ _im_width(im_width),
+ _im_scale(im_scale),
+ _spatial_scale(spatial_scale),
+ _pre_nms_topN(pre_nms_topN),
+ _post_nms_topN(post_nms_topN),
+ _nms_thres(nms_thres),
+ _min_size(min_size),
+ _values_per_roi(values_per_roi)
{
}
@@ -1418,11 +1463,20 @@ public:
* @param[in] correct_transform_coords (Optional)Correct bounding box transform coordinates. Defaults to false
* @param[in] bbox_xform_clip (Optional)Minimum bounding box width and height after bounding box transformation in log-space. Defaults to log(1000/16)
*/
- BoundingBoxTransformInfo(float img_width, float img_height, float scale, bool apply_scale = false, const std::array<float, 4> weights = { { 1.f, 1.f, 1.f, 1.f } }, bool correct_transform_coords =
- false,
- float bbox_xform_clip =
- 4.135166556742356f)
- : _img_width(img_width), _img_height(img_height), _scale(scale), _apply_scale(apply_scale), _correct_transform_coords(correct_transform_coords), _weights(weights), _bbox_xform_clip(bbox_xform_clip)
+ BoundingBoxTransformInfo(float img_width,
+ float img_height,
+ float scale,
+ bool apply_scale = false,
+ const std::array<float, 4> weights = {{1.f, 1.f, 1.f, 1.f}},
+ bool correct_transform_coords = false,
+ float bbox_xform_clip = 4.135166556742356f)
+ : _img_width(img_width),
+ _img_height(img_height),
+ _scale(scale),
+ _apply_scale(apply_scale),
+ _correct_transform_coords(correct_transform_coords),
+ _weights(weights),
+ _bbox_xform_clip(bbox_xform_clip)
{
}
@@ -1462,114 +1516,13 @@ public:
}
private:
- float _img_width;
- float _img_height;
- float _scale;
- bool _apply_scale;
- bool _correct_transform_coords;
+ float _img_width;
+ float _img_height;
+ float _scale;
+ bool _apply_scale;
+ bool _correct_transform_coords;
std::array<float, 4> _weights;
- float _bbox_xform_clip;
-};
-
-/** Activation Layer Information class */
-class ActivationLayerInfo
-{
-public:
- /** Available activation functions */
- enum class ActivationFunction
- {
- LOGISTIC, /**< Logistic ( \f$ f(x) = \frac{1}{1 + e^{-x}} \f$ ) */
- TANH, /**< Hyperbolic tangent ( \f$ f(x) = a \cdot tanh(b \cdot x) \f$ ) */
- RELU, /**< Rectifier ( \f$ f(x) = max(0,x) \f$ ) */
- BOUNDED_RELU, /**< Upper Bounded Rectifier ( \f$ f(x) = min(a, max(0,x)) \f$ ) */
- LU_BOUNDED_RELU, /**< Lower and Upper Bounded Rectifier ( \f$ f(x) = min(a, max(b,x)) \f$ ) */
- LEAKY_RELU, /**< Leaky Rectifier ( \f$ f(x) = \begin{cases} \alpha x & \quad \text{if } x \text{ < 0}\\ x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
- SOFT_RELU, /**< Soft Rectifier ( \f$ f(x)= log(1+e^x) \f$ ) */
- ELU, /**< Exponential Linear Unit ( \f$ f(x) = \begin{cases} \alpha (exp(x) - 1) & \quad \text{if } x \text{ < 0}\\ x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */
- ABS, /**< Absolute ( \f$ f(x)= |x| \f$ ) */
- SQUARE, /**< Square ( \f$ f(x)= x^2 \f$ )*/
- SQRT, /**< Square root ( \f$ f(x) = \sqrt{x} \f$ )*/
- LINEAR, /**< Linear ( \f$ f(x)= ax + b \f$ ) */
- IDENTITY, /**< Identity ( \f$ f(x)= x \f$ ) */
- HARD_SWISH /**< Hard-swish ( \f$ f(x) = (x * relu6(x+3))/6 \f$ ) */
- };
-
- ActivationLayerInfo() = default;
- /** Default Constructor
- *
- * @param[in] f The activation function to use.
- * @param[in] a (Optional) The alpha parameter used by some activation functions
- * (@ref ActivationFunction::BOUNDED_RELU, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH).
- * @param[in] b (Optional) The beta parameter used by some activation functions (@ref ActivationFunction::LINEAR, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::TANH).
- */
- ActivationLayerInfo(ActivationFunction f, float a = 0.0f, float b = 0.0f)
- : _act(f), _a(a), _b(b), _enabled(true)
- {
- }
- /** Get the type of activation function */
- ActivationFunction activation() const
- {
- return _act;
- }
- /** Get the alpha value */
- float a() const
- {
- return _a;
- }
- /** Get the beta value */
- float b() const
- {
- return _b;
- }
- /** Check if initialised */
- bool enabled() const
- {
- return _enabled;
- }
-
-private:
- ActivationFunction _act = { ActivationLayerInfo::ActivationFunction::IDENTITY };
- float _a = {};
- float _b = {};
- bool _enabled = { false };
-};
-
-/** Fully connected layer info */
-struct FullyConnectedLayerInfo
-{
- /* Fused-activation parameters */
- ActivationLayerInfo activation_info{}; /**< Fused activation to apply after the matrix multiplication. */
- /* Information about weights */
- DataLayout weights_trained_layout{ DataLayout::NCHW }; /**< Layout that the weights have been trained with. */
- bool transpose_weights{ true }; /**< Transpose weights if true. */
- bool are_weights_reshaped{ false }; /**< Reshape the weights tensor if false. */
- bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */
- bool constant_weights{ true }; /**< If false, weights can vary between runs. */
- /* Other parameters */
- bool fp_mixed_precision{ false }; /**< Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */
-
- /** Sets the weights trained data layout
- *
- * @param[in] layout Data layout that the weights were trained with
- *
- * @return Updated object
- */
- FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout)
- {
- weights_trained_layout = layout;
- return *this;
- }
- /** Sets the transpose weights flag
- *
- * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed
- *
- * @return Updated object
- */
- FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights)
- {
- transpose_weights = should_transpose_weights;
- return *this;
- }
+ float _bbox_xform_clip;
};
/** Normalization Layer Information class */
@@ -1586,7 +1539,12 @@ public:
* @param[in] is_scaled (Optional) Boolean that specifies if alpha will be scaled by the normalization size or not.
* Should be false to follow [Krichevksy 2012].
*/
- NormalizationLayerInfo(NormType type, uint32_t norm_size = 5, float alpha = 0.0001f, float beta = 0.5f, float kappa = 1.f, bool is_scaled = true)
+ NormalizationLayerInfo(NormType type,
+ uint32_t norm_size = 5,
+ float alpha = 0.0001f,
+ float beta = 0.5f,
+ float kappa = 1.f,
+ bool is_scaled = true)
: _type(type), _norm_size(norm_size), _alpha(alpha), _beta(beta), _kappa(kappa), _is_scaled(is_scaled)
{
}
@@ -1690,13 +1648,36 @@ private:
int32_t _shrink_axis_mask;
};
+// OHWIo<interleave_by>i<block_by>
+inline int interleave_by(const WeightFormat wf)
+{
+ return (static_cast<int>(wf) >> 8) & 0xFFF;
+}
+inline int block_by(const WeightFormat wf)
+{
+ return (static_cast<int>(wf) >> 20) & 0xF;
+}
+inline bool is_fixed_format(const WeightFormat &wf)
+{
+ return wf != WeightFormat::UNSPECIFIED && wf != WeightFormat::ANY;
+}
+inline bool is_fixed_format_fast_math(const WeightFormat &wf)
+{
+ return (static_cast<int>(wf) >> 4) & 0x1;
+}
+
/** Convolution Layer Weights Information class. This class stores the necessary information to compute convolution layer when the weights are already reshaped */
class WeightsInfo
{
public:
/** Default constructor */
WeightsInfo()
- : _are_reshaped(false), _kernel_width(0), _kernel_height(0), _num_kernels(0), _retain_internal_weights(false)
+ : _are_reshaped(false),
+ _kernel_width(0),
+ _kernel_height(0),
+ _num_kernels(0),
+ _retain_internal_weights(false),
+ _weight_format(arm_compute::WeightFormat::UNSPECIFIED)
{
}
/** Constructor
@@ -1706,9 +1687,20 @@ public:
* @param[in] kernel_height Kernel height.
* @param[in] num_kernels Number of convolution kernels.
* @param[in] retain_internal_weights (Optional) True if internal reshaped weights must be retained. Used for reconfiguration purposes. Default is false.
+ * @param[in] weight_format (Optional) arm_gemm:WeightFormat enumeration requested by the user. Default is arm_compute::WeightFormat::UNSPECIFIED.
*/
- WeightsInfo(bool are_reshaped, unsigned int kernel_width, unsigned int kernel_height, unsigned int num_kernels, bool retain_internal_weights = false)
- : _are_reshaped(are_reshaped), _kernel_width(kernel_width), _kernel_height(kernel_height), _num_kernels(num_kernels), _retain_internal_weights(retain_internal_weights)
+ WeightsInfo(bool are_reshaped,
+ unsigned int kernel_width,
+ unsigned int kernel_height,
+ unsigned int num_kernels,
+ bool retain_internal_weights = false,
+ arm_compute::WeightFormat weight_format = arm_compute::WeightFormat::UNSPECIFIED)
+ : _are_reshaped(are_reshaped),
+ _kernel_width(kernel_width),
+ _kernel_height(kernel_height),
+ _num_kernels(num_kernels),
+ _retain_internal_weights(retain_internal_weights),
+ _weight_format(weight_format)
{
}
/** Flag which specifies if the weights tensor has been reshaped.
@@ -1739,21 +1731,39 @@ public:
{
return _retain_internal_weights;
}
+ arm_compute::WeightFormat weight_format() const
+ {
+ return _weight_format;
+ }
+ void set_weight_format(arm_compute::WeightFormat weight_format)
+ {
+ _weight_format = weight_format;
+ }
+
+ unsigned int kernel_width() const
+ {
+ return _kernel_width;
+ }
+ unsigned int kernel_height() const
+ {
+ return _kernel_height;
+ }
private:
- bool _are_reshaped;
- unsigned int _kernel_width;
- unsigned int _kernel_height;
- unsigned int _num_kernels;
- bool _retain_internal_weights;
+ bool _are_reshaped;
+ unsigned int _kernel_width;
+ unsigned int _kernel_height;
+ unsigned int _num_kernels;
+ bool _retain_internal_weights;
+ arm_compute::WeightFormat _weight_format;
};
/** GEMM reshape information class. This class stores the necessary information about matrix A and matrix B reshape.
*
- * The matrix A can only be reshaped through @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel or @ref NEGEMMInterleave4x4Kernel
+ * The matrix A can only be reshaped through @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel or @ref cpu::kernels::CpuGemmInterleave4x4Kernel
* Note: Optionally just for @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel is it possible to set mult_interleave4x4_height, the multiplication factor for the height of the 4x4 interleaved block
*
- * The matrix B can only be reshaped through @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel or @ref NEGEMMTranspose1xWKernel
+ * The matrix B can only be reshaped through @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel or @ref cpu::kernels::CpuGemmTranspose1xWKernel
* Note: Optionally just for @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel is it possible to set mult_transpose1xW_width, the multiplication factor for the width of the 1xW transposed block
*
*/
@@ -1762,7 +1772,14 @@ class GEMMReshapeInfo final
public:
/** Default constructor */
GEMMReshapeInfo()
- : _m(1), _n(1), _k(1), _mult_transpose1xW_width(1), _mult_interleave4x4_height(1), _depth_output_gemm3d(0), _reinterpret_input_as_3d(false), _broadcast_bias(false)
+ : _m(1),
+ _n(1),
+ _k(1),
+ _mult_transpose1xW_width(1),
+ _mult_interleave4x4_height(1),
+ _depth_output_gemm3d(0),
+ _reinterpret_input_as_3d(false),
+ _broadcast_bias(false)
{
}
/** Constructor
@@ -1778,9 +1795,22 @@ public:
* to perform 1x1 convolutions with the NHWC data layout)
* @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
*/
- GEMMReshapeInfo(int m, int n, int k, int mult_transpose1xW_width = 1, int mult_interleave4x4_height = 1, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool broadcast_bias = false)
- : _m(m), _n(n), _k(k), _mult_transpose1xW_width(mult_transpose1xW_width), _mult_interleave4x4_height(mult_interleave4x4_height), _depth_output_gemm3d(depth_output_gemm3d),
- _reinterpret_input_as_3d(reinterpret_input_as_3d), _broadcast_bias(broadcast_bias)
+ GEMMReshapeInfo(int m,
+ int n,
+ int k,
+ int mult_transpose1xW_width = 1,
+ int mult_interleave4x4_height = 1,
+ int depth_output_gemm3d = 0,
+ bool reinterpret_input_as_3d = false,
+ bool broadcast_bias = false)
+ : _m(m),
+ _n(n),
+ _k(k),
+ _mult_transpose1xW_width(mult_transpose1xW_width),
+ _mult_interleave4x4_height(mult_interleave4x4_height),
+ _depth_output_gemm3d(depth_output_gemm3d),
+ _reinterpret_input_as_3d(reinterpret_input_as_3d),
+ _broadcast_bias(broadcast_bias)
{
}
/** Number of matrix A rows
@@ -1862,44 +1892,6 @@ private:
bool _broadcast_bias;
};
-struct ConvolutionInfo
-{
- ConvolutionInfo() = default;
- ConvolutionInfo(const PadStrideInfo &pad_stride_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
- : pad_stride_info(pad_stride_info), depth_multiplier(depth_multiplier), act_info(act_info), dilation(dilation)
- {
- }
- PadStrideInfo pad_stride_info{}; /**< Convolution info (Pads, strides,...) */
- unsigned int depth_multiplier{ 1 }; /**< Multiplier to apply to input's depth to retrieve the output depth. Defaults to 1 */
- ActivationLayerInfo act_info{}; /**< Fused activation to apply after convolution. */
- Size2D dilation{ Size2D(1, 1) }; /**< Dilation, in elements, across x and y. Defaults to (1, 1). */
-};
-
-/** GEMMLowp output stage type */
-enum class GEMMLowpOutputStageType
-{
- NONE, /**< No quantization */
- QUANTIZE_DOWN, /**< Quantize using an integer multiplication */
- QUANTIZE_DOWN_FIXEDPOINT, /**< Quantize using a fixed point multiplication */
- QUANTIZE_DOWN_FLOAT /**< Quantize using a floating point multiplication */
-};
-
-/** GEMMLowp output stage info */
-struct GEMMLowpOutputStageInfo
-{
- GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */
- int32_t gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */
- int32_t gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- int32_t gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */
- int32_t gemmlowp_min_bound{ std::numeric_limits<int32_t>::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */
- int32_t gemmlowp_max_bound{ std::numeric_limits<int32_t>::max() }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */
- std::vector<int32_t> gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- std::vector<int32_t> gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */
- float gemmlowp_real_multiplier{ 0 }; /**< GEMMLowp output stage real multiplier used for quantizing to QASYMM8 */
- bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */
- DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */
-};
-
/** GEMM LHS (Left Hand Side) matrix information */
struct GEMMLHSMatrixInfo
{
@@ -1908,11 +1900,11 @@ struct GEMMLHSMatrixInfo
: m0(m), k0(k), v0(v), transpose(trans), interleave(inter)
{
}
- unsigned int m0{ 1 }; /**< Number of rows processed by the matrix multiplication */
- unsigned int k0{ 1 }; /**< Number of partial accumulations performed by the matrix multiplication */
- unsigned int v0{ 1 }; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
- bool transpose{ true }; /**< True if the (m0xk0) block has to be transposed before been stored */
- bool interleave{ true }; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */
+ unsigned int m0{1}; /**< Number of rows processed by the matrix multiplication */
+ unsigned int k0{1}; /**< Number of partial accumulations performed by the matrix multiplication */
+ unsigned int v0{1}; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
+ bool transpose{true}; /**< True if the (m0xk0) block has to be transposed before been stored */
+ bool interleave{true}; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */
};
/** GEMM RHS (Right Hand Side) matrix information */
@@ -1923,208 +1915,16 @@ struct GEMMRHSMatrixInfo
: n0(n), k0(k), h0(h), transpose(trans), interleave(inter), export_to_cl_image(export_to_cl_img)
{
}
- unsigned int n0{ 1 }; /**< Number of columns processed by the matrix multiplication */
- unsigned int k0{ 1 }; /**< Number of partial accumulations performed by the matrix multiplication */
- unsigned int h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
- bool transpose{ true }; /**< True if the (k0xn0) block has to be transposed before been stored */
- bool interleave{ true }; /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */
- bool export_to_cl_image{ false }; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */
+ unsigned int n0{1}; /**< Number of columns processed by the matrix multiplication */
+ unsigned int k0{1}; /**< Number of partial accumulations performed by the matrix multiplication */
+ unsigned int h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
+ bool transpose{true}; /**< True if the (k0xn0) block has to be transposed before been stored */
+ bool interleave{true}; /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */
+ bool export_to_cl_image{
+ false}; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */
};
-/** GEMM information class. This class stores the necessary information to compute GEMM functions
- *
- * This object also contains the information about how matrix A and matrix B have been reshaped
- *
- */
-class GEMMInfo
-{
-public:
- /** Default constructor */
- GEMMInfo() noexcept
- : _is_a_reshaped(false),
- _is_b_reshaped(false),
- _reshape_b_only_on_first_run(true),
- _depth_output_gemm3d(0),
- _reinterpret_input_as_3d(false),
- _retain_internal_weights(false),
- _gemmlowp_output_stage(),
- _fp_mixed_precision(false),
- _broadcast_bias(false),
- _pretranpose_B(true),
- _activation_info(),
- _constant_weights(true)
- {
- }
- /** Constructor
- *
- * @param[in] is_a_reshaped True if the matrix A has been reshaped
- * @param[in] is_b_reshaped True if the matrix B has been reshaped
- * @param[in] reshape_b_only_on_first_run Reshape matrix B only for the first run
- * @param[in] depth_output_gemm3d (Optional) Depth (third dimension) of the output tensor to be used with the GEMM3D kernel
- * If 0 the output will not be reinterpreted as 3D. Default 0
- * @param[in] reinterpret_input_as_3d (Optional) Reinterpret the input as 3D tensor. (i.e. this flag should be set to true when GEMM is used
- * to perform 1x1 convolutions with the NHWC data layout)
- * @param[in] retain_internal_weights (Optional) Retain the weights tensor from previous run
- * @param[in] gemmlowp_output_stage (Optional) GEMMLowp Output stage info
- * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
- * @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
- * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
- * @param[in] constant_weights (Optional) Weights have constant values throughout multiple executions
- */
- GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false,
- GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool broadcast_bias = false,
- const ActivationLayerInfo &activation_info = ActivationLayerInfo(), bool constant_weights = true) noexcept
- : _is_a_reshaped(is_a_reshaped),
- _is_b_reshaped(is_b_reshaped),
- _reshape_b_only_on_first_run(reshape_b_only_on_first_run),
- _depth_output_gemm3d(depth_output_gemm3d),
- _reinterpret_input_as_3d(reinterpret_input_as_3d),
- _retain_internal_weights(retain_internal_weights),
- _gemmlowp_output_stage(gemmlowp_output_stage),
- _fp_mixed_precision(fp_mixed_precision),
- _broadcast_bias(broadcast_bias),
- _pretranpose_B(reshape_b_only_on_first_run),
- _activation_info(activation_info),
- _constant_weights(constant_weights)
- {
- }
- /** Flag which specifies if the matrix A has been reshaped
- *
- * @return True if the matrix A has been reshaped
- */
- bool is_a_reshaped() const
- {
- return _is_a_reshaped;
- };
- /** Flag which specifies if the matrix B has been reshaped
- *
- * @return True if the matrix B has been reshaped
- */
- bool is_b_reshaped() const
- {
- return _is_b_reshaped;
- };
- /** Flag which specifies if the reshape of matrix B should executed only for the first
- *
- * @note This flag could be set to TRUE when GEMM is used to accelerate convolution layer
- *
- * @return True if the reshaped of matrix B happens only for the first run
- */
- bool reshape_b_only_on_first_run() const
- {
- return _reshape_b_only_on_first_run;
- };
- /** Depth of the output when GEMM output is reinterpreted as 3D tensor
- *
- * @return the depth of the output tensor
- */
- int depth_output_gemm3d() const
- {
- return _depth_output_gemm3d;
- };
- /** Flag which specifies if the input tensor has to be reinterpreted as 3D
- *
- * @return True if the input tensor has to be reinterpreted as 3D tensor
- */
- bool reinterpret_input_as_3d() const
- {
- return _reinterpret_input_as_3d;
- };
- /** Flag which specifies if the weights tensor has to be retained from previous run
- *
- * @return True if the weights tensor has to be retained
- */
- bool retain_internal_weights() const
- {
- return _retain_internal_weights;
- };
- /** GEMMLowp output stage
- *
- * @return the GEMMLowp output stage info
- */
- GEMMLowpOutputStageInfo gemmlowp_output_stage() const
- {
- return _gemmlowp_output_stage;
- };
- /** Sets GEMMLowp output stage
- *
- * @param[in] output_stage Output stage to set
- */
- void set_gemmlowp_output_stage(GEMMLowpOutputStageInfo &output_stage)
- {
- _gemmlowp_output_stage = output_stage;
- };
- /** Flag which specifies if a wider accumulator should be used.
- *
- * @return True if a wider accumulator has to be used
- */
- bool fp_mixed_precision() const
- {
- return _fp_mixed_precision;
- };
- /** Flag which specifies whether to broadcast the shape of the bias tensor.
- *
- * @return True if the shape of the bias tensor is to be broadcasted.
- */
- bool broadcast_bias() const
- {
- return _broadcast_bias;
- };
- /** Flag which specifies whether b should be pre-transposed if supported.
- *
- * @return True if b should be pre-transposed else false.
- */
- bool pretranpose_B() const
- {
- return _pretranpose_B;
- };
- /** Set pre-transpose b flag
- *
- * @param[in] flag Flag to set
- */
- void set_pretranpose_B(bool flag)
- {
- _pretranpose_B = flag;
- }
- /** Activation layer to apply after the matrix multiplication
- *
- * @return ActivationLayerInfo object
- */
- ActivationLayerInfo activation_info() const
- {
- return _activation_info;
- }
- /** Set activation layer info
- *
- * @param[in] activation_info ActivationLayerInfo object to set
- */
- void set_activation_info(const ActivationLayerInfo &activation_info)
- {
- _activation_info = activation_info;
- }
- /** Flag which specifies if the values of the weights tensor are constant throughout multiple executions or not
- *
- * @return True if the weights tensor is constant
- */
- bool constant_weights() const
- {
- return _constant_weights;
- };
-
-private:
- bool _is_a_reshaped;
- bool _is_b_reshaped;
- bool _reshape_b_only_on_first_run;
- int _depth_output_gemm3d;
- bool _reinterpret_input_as_3d;
- bool _retain_internal_weights;
- GEMMLowpOutputStageInfo _gemmlowp_output_stage;
- bool _fp_mixed_precision;
- bool _broadcast_bias;
- bool _pretranpose_B;
- ActivationLayerInfo _activation_info;
- bool _constant_weights;
-};
+class ITensorInfo;
/** Winograd information */
struct WinogradInfo
@@ -2137,16 +1937,23 @@ struct WinogradInfo
* @param[in] conv_info Convolution info (Pads, strides)
* @param[in] data_layout Data layout to use for the output tensor once the convolution has been applied
*/
- WinogradInfo(Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout)
- : output_tile_size(output_tile_sz), kernel_size(kernel_sz), input_dimensions(input_dims), convolution_info(conv_info), output_data_layout(data_layout)
- {
- }
-
- Size2D output_tile_size{}; /**< Width and height of the output tile */
- Size2D kernel_size{}; /**< Width and height of the kernel*/
- Size2D input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */
- PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */
- DataLayout output_data_layout{ DataLayout::NCHW }; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */
+ WinogradInfo(
+ Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout)
+ : output_tile_size(output_tile_sz),
+ kernel_size(kernel_sz),
+ input_dimensions(input_dims),
+ convolution_info(conv_info),
+ output_data_layout(data_layout)
+ {
+ }
+
+ Size2D output_tile_size{}; /**< Width and height of the output tile */
+ Size2D kernel_size{}; /**< Width and height of the kernel*/
+ Size2D input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */
+ PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */
+ DataLayout output_data_layout{
+ DataLayout::
+ NCHW}; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */
};
/** IO formatting information class*/
@@ -2205,5 +2012,8 @@ struct IOFormatInfo
/** Align columns */
bool align_columns;
};
+
+/** Class for holding information related to cropping */
+using CropInfo = Padding2D;
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TYPES_H */
+#endif // ACL_ARM_COMPUTE_CORE_TYPES_H
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index af9a777a0c..a2146522f7 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,67 +26,29 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Rounding.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Version.h"
-#include <algorithm>
-#include <cstdint>
-#include <cstdlib>
-#include <iomanip>
+#include <cmath>
#include <numeric>
#include <sstream>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <utility>
-#include <vector>
+
+/* Convenience / backwards compatibility includes */
+#include "arm_compute/core/utils/ActivationFunctionUtils.h"
+#include "arm_compute/core/utils/DataLayoutUtils.h"
+#include "arm_compute/core/utils/DataTypeUtils.h"
+#include "arm_compute/core/utils/FormatUtils.h"
+#include "arm_compute/core/utils/InterpolationPolicyUtils.h"
+#include "arm_compute/core/utils/StringUtils.h"
namespace arm_compute
{
class ITensor;
class ITensorInfo;
-
-/** Calculate the rounded up quotient of val / m.
- *
- * @param[in] val Value to divide and round up.
- * @param[in] m Value to divide by.
- *
- * @return the result.
- */
-template <typename S, typename T>
-constexpr auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m)
-{
- return (val + m - 1) / m;
-}
-
-/** Computes the smallest number larger or equal to value that is a multiple of divisor.
- *
- * @param[in] value Lower bound value
- * @param[in] divisor Value to compute multiple of.
- *
- * @return the result.
- */
-template <typename S, typename T>
-inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor)
-{
- ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0);
- return DIV_CEIL(value, divisor) * divisor;
-}
-
-/** Computes the largest number smaller or equal to value that is a multiple of divisor.
- *
- * @param[in] value Upper bound value
- * @param[in] divisor Value to compute multiple of.
- *
- * @return the result.
- */
-template <typename S, typename T>
-inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) * divisor)
-{
- ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0);
- return (value / divisor) * divisor;
-}
+class ActivationLayerInfo;
/** Load an entire file in memory
*
@@ -97,627 +59,6 @@ inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor)
*/
std::string read_file(const std::string &filename, bool binary);
-/** The size in bytes of the data type
- *
- * @param[in] data_type Input data type
- *
- * @return The size in bytes of the data type
- */
-inline size_t data_size_from_type(DataType data_type)
-{
- switch(data_type)
- {
- case DataType::U8:
- case DataType::S8:
- case DataType::QSYMM8:
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- return 1;
- case DataType::U16:
- case DataType::S16:
- case DataType::QSYMM16:
- case DataType::QASYMM16:
- case DataType::BFLOAT16:
- case DataType::F16:
- return 2;
- case DataType::F32:
- case DataType::U32:
- case DataType::S32:
- return 4;
- case DataType::F64:
- case DataType::U64:
- case DataType::S64:
- return 8;
- case DataType::SIZET:
- return sizeof(size_t);
- default:
- ARM_COMPUTE_ERROR("Invalid data type");
- return 0;
- }
-}
-
-/** The size in bytes of the pixel format
- *
- * @param[in] format Input format
- *
- * @return The size in bytes of the pixel format
- */
-inline size_t pixel_size_from_format(Format format)
-{
- switch(format)
- {
- case Format::U8:
- return 1;
- case Format::U16:
- case Format::S16:
- case Format::BFLOAT16:
- case Format::F16:
- case Format::UV88:
- case Format::YUYV422:
- case Format::UYVY422:
- return 2;
- case Format::RGB888:
- return 3;
- case Format::RGBA8888:
- return 4;
- case Format::U32:
- case Format::S32:
- case Format::F32:
- return 4;
- //Doesn't make sense for planar formats:
- case Format::NV12:
- case Format::NV21:
- case Format::IYUV:
- case Format::YUV444:
- default:
- ARM_COMPUTE_ERROR("Undefined pixel size for given format");
- return 0;
- }
-}
-
-/** The size in bytes of the data type
- *
- * @param[in] dt Input data type
- *
- * @return The size in bytes of the data type
- */
-inline size_t element_size_from_data_type(DataType dt)
-{
- switch(dt)
- {
- case DataType::S8:
- case DataType::U8:
- case DataType::QSYMM8:
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- return 1;
- case DataType::U16:
- case DataType::S16:
- case DataType::QSYMM16:
- case DataType::QASYMM16:
- case DataType::BFLOAT16:
- case DataType::F16:
- return 2;
- case DataType::U32:
- case DataType::S32:
- case DataType::F32:
- return 4;
- default:
- ARM_COMPUTE_ERROR("Undefined element size for given data type");
- return 0;
- }
-}
-
-/** Return the data type used by a given single-planar pixel format
- *
- * @param[in] format Input format
- *
- * @return The size in bytes of the pixel format
- */
-inline DataType data_type_from_format(Format format)
-{
- switch(format)
- {
- case Format::U8:
- case Format::UV88:
- case Format::RGB888:
- case Format::RGBA8888:
- case Format::YUYV422:
- case Format::UYVY422:
- return DataType::U8;
- case Format::U16:
- return DataType::U16;
- case Format::S16:
- return DataType::S16;
- case Format::U32:
- return DataType::U32;
- case Format::S32:
- return DataType::S32;
- case Format::BFLOAT16:
- return DataType::BFLOAT16;
- case Format::F16:
- return DataType::F16;
- case Format::F32:
- return DataType::F32;
- //Doesn't make sense for planar formats:
- case Format::NV12:
- case Format::NV21:
- case Format::IYUV:
- case Format::YUV444:
- default:
- ARM_COMPUTE_ERROR("Not supported data_type for given format");
- return DataType::UNKNOWN;
- }
-}
-
-/** Return the plane index of a given channel given an input format.
- *
- * @param[in] format Input format
- * @param[in] channel Input channel
- *
- * @return The plane index of the specific channel of the specific format
- */
-inline int plane_idx_from_channel(Format format, Channel channel)
-{
- switch(format)
- {
- // Single planar formats have a single plane
- case Format::U8:
- case Format::U16:
- case Format::S16:
- case Format::U32:
- case Format::S32:
- case Format::BFLOAT16:
- case Format::F16:
- case Format::F32:
- case Format::UV88:
- case Format::RGB888:
- case Format::RGBA8888:
- case Format::YUYV422:
- case Format::UYVY422:
- return 0;
- // Multi planar formats
- case Format::NV12:
- case Format::NV21:
- {
- // Channel U and V share the same plane of format UV88
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- case Channel::V:
- return 1;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::IYUV:
- case Format::YUV444:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 1;
- case Channel::V:
- return 2;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- default:
- ARM_COMPUTE_ERROR("Not supported format");
- return 0;
- }
-}
-
-/** Return the channel index of a given channel given an input format.
- *
- * @param[in] format Input format
- * @param[in] channel Input channel
- *
- * @return The channel index of the specific channel of the specific format
- */
-inline int channel_idx_from_format(Format format, Channel channel)
-{
- switch(format)
- {
- case Format::RGB888:
- {
- switch(channel)
- {
- case Channel::R:
- return 0;
- case Channel::G:
- return 1;
- case Channel::B:
- return 2;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::RGBA8888:
- {
- switch(channel)
- {
- case Channel::R:
- return 0;
- case Channel::G:
- return 1;
- case Channel::B:
- return 2;
- case Channel::A:
- return 3;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::YUYV422:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 1;
- case Channel::V:
- return 3;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::UYVY422:
- {
- switch(channel)
- {
- case Channel::Y:
- return 1;
- case Channel::U:
- return 0;
- case Channel::V:
- return 2;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::NV12:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 0;
- case Channel::V:
- return 1;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::NV21:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 1;
- case Channel::V:
- return 0;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- case Format::YUV444:
- case Format::IYUV:
- {
- switch(channel)
- {
- case Channel::Y:
- return 0;
- case Channel::U:
- return 0;
- case Channel::V:
- return 0;
- default:
- ARM_COMPUTE_ERROR("Not supported channel");
- return 0;
- }
- }
- default:
- ARM_COMPUTE_ERROR("Not supported format");
- return 0;
- }
-}
-
-/** Return the number of planes for a given format
- *
- * @param[in] format Input format
- *
- * @return The number of planes for a given image format.
- */
-inline size_t num_planes_from_format(Format format)
-{
- switch(format)
- {
- case Format::U8:
- case Format::S16:
- case Format::U16:
- case Format::S32:
- case Format::U32:
- case Format::BFLOAT16:
- case Format::F16:
- case Format::F32:
- case Format::RGB888:
- case Format::RGBA8888:
- case Format::YUYV422:
- case Format::UYVY422:
- return 1;
- case Format::NV12:
- case Format::NV21:
- return 2;
- case Format::IYUV:
- case Format::YUV444:
- return 3;
- default:
- ARM_COMPUTE_ERROR("Not supported format");
- return 0;
- }
-}
-
-/** Return the number of channels for a given single-planar pixel format
- *
- * @param[in] format Input format
- *
- * @return The number of channels for a given image format.
- */
-inline size_t num_channels_from_format(Format format)
-{
- switch(format)
- {
- case Format::U8:
- case Format::U16:
- case Format::S16:
- case Format::U32:
- case Format::S32:
- case Format::BFLOAT16:
- case Format::F16:
- case Format::F32:
- return 1;
- // Because the U and V channels are subsampled
- // these formats appear like having only 2 channels:
- case Format::YUYV422:
- case Format::UYVY422:
- return 2;
- case Format::UV88:
- return 2;
- case Format::RGB888:
- return 3;
- case Format::RGBA8888:
- return 4;
- //Doesn't make sense for planar formats:
- case Format::NV12:
- case Format::NV21:
- case Format::IYUV:
- case Format::YUV444:
- default:
- return 0;
- }
-}
-
-/** Return the promoted data type of a given data type.
- *
- * @note If promoted data type is not supported an error will be thrown
- *
- * @param[in] dt Data type to get the promoted type of.
- *
- * @return Promoted data type
- */
-inline DataType get_promoted_data_type(DataType dt)
-{
- switch(dt)
- {
- case DataType::U8:
- return DataType::U16;
- case DataType::S8:
- return DataType::S16;
- case DataType::U16:
- return DataType::U32;
- case DataType::S16:
- return DataType::S32;
- case DataType::QSYMM8:
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- case DataType::QSYMM16:
- case DataType::QASYMM16:
- case DataType::BFLOAT16:
- case DataType::F16:
- case DataType::U32:
- case DataType::S32:
- case DataType::F32:
- ARM_COMPUTE_ERROR("Unsupported data type promotions!");
- default:
- ARM_COMPUTE_ERROR("Undefined data type!");
- }
- return DataType::UNKNOWN;
-}
-
-/** Compute the mininum and maximum values a data type can take
- *
- * @param[in] dt Data type to get the min/max bounds of
- *
- * @return A tuple (min,max) with the minimum and maximum values respectively wrapped in PixelValue.
- */
-inline std::tuple<PixelValue, PixelValue> get_min_max(DataType dt)
-{
- PixelValue min{};
- PixelValue max{};
- switch(dt)
- {
- case DataType::U8:
- case DataType::QASYMM8:
- {
- min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::lowest()));
- max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()));
- break;
- }
- case DataType::S8:
- case DataType::QSYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- {
- min = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::lowest()));
- max = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::max()));
- break;
- }
- case DataType::U16:
- case DataType::QASYMM16:
- {
- min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::lowest()));
- max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::max()));
- break;
- }
- case DataType::S16:
- case DataType::QSYMM16:
- {
- min = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::lowest()));
- max = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::max()));
- break;
- }
- case DataType::U32:
- {
- min = PixelValue(std::numeric_limits<uint32_t>::lowest());
- max = PixelValue(std::numeric_limits<uint32_t>::max());
- break;
- }
- case DataType::S32:
- {
- min = PixelValue(std::numeric_limits<int32_t>::lowest());
- max = PixelValue(std::numeric_limits<int32_t>::max());
- break;
- }
- case DataType::BFLOAT16:
- {
- min = PixelValue(bfloat16::lowest());
- max = PixelValue(bfloat16::max());
- break;
- }
- case DataType::F16:
- {
- min = PixelValue(std::numeric_limits<half>::lowest());
- max = PixelValue(std::numeric_limits<half>::max());
- break;
- }
- case DataType::F32:
- {
- min = PixelValue(std::numeric_limits<float>::lowest());
- max = PixelValue(std::numeric_limits<float>::max());
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Undefined data type!");
- }
- return std::make_tuple(min, max);
-}
-
-/** Return true if the given format has horizontal subsampling.
- *
- * @param[in] format Format to determine subsampling.
- *
- * @return True if the format can be subsampled horizontaly.
- */
-inline bool has_format_horizontal_subsampling(Format format)
-{
- return (format == Format::YUYV422 || format == Format::UYVY422 || format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false;
-}
-
-/** Return true if the given format has vertical subsampling.
- *
- * @param[in] format Format to determine subsampling.
- *
- * @return True if the format can be subsampled verticaly.
- */
-inline bool has_format_vertical_subsampling(Format format)
-{
- return (format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false;
-}
-
-/** Adjust tensor shape size if width or height are odd for a given multi-planar format. No modification is done for other formats.
- *
- * @note Adding here a few links discussing the issue of odd size and sharing the same solution:
- * <a href="https://android.googlesource.com/platform/frameworks/base/+/refs/heads/master/graphics/java/android/graphics/YuvImage.java">Android Source</a>
- * <a href="https://groups.google.com/a/webmproject.org/forum/#!topic/webm-discuss/LaCKpqiDTXM">WebM</a>
- * <a href="https://bugs.chromium.org/p/libyuv/issues/detail?id=198&amp;can=1&amp;q=odd%20width">libYUV</a>
- * <a href="https://sourceforge.net/p/raw-yuvplayer/bugs/1/">YUVPlayer</a> *
- *
- * @param[in, out] shape Tensor shape of 2D size
- * @param[in] format Format of the tensor
- *
- * @return The adjusted tensor shape.
- */
-inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format)
-{
- TensorShape output{ shape };
-
- // Force width to be even for formats which require subsampling of the U and V channels
- if(has_format_horizontal_subsampling(format))
- {
- output.set(0, (output.x() + 1) & ~1U);
- }
-
- // Force height to be even for formats which require subsampling of the U and V channels
- if(has_format_vertical_subsampling(format))
- {
- output.set(1, (output.y() + 1) & ~1U);
- }
-
- return output;
-}
-
-/** Calculate subsampled shape for a given format and channel
- *
- * @param[in] shape Shape of the tensor to calculate the extracted channel.
- * @param[in] format Format of the tensor.
- * @param[in] channel Channel to create tensor shape to be extracted.
- *
- * @return The subsampled tensor shape.
- */
-inline TensorShape calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel = Channel::UNKNOWN)
-{
- TensorShape output{ shape };
-
- // Subsample shape only for U or V channel
- if(Channel::U == channel || Channel::V == channel || Channel::UNKNOWN == channel)
- {
- // Subsample width for the tensor shape when channel is U or V
- if(has_format_horizontal_subsampling(format))
- {
- output.set(0, output.x() / 2U);
- }
-
- // Subsample height for the tensor shape when channel is U or V
- if(has_format_vertical_subsampling(format))
- {
- output.set(1, output.y() / 2U);
- }
- }
-
- return output;
-}
-
/** Permutes the given dimensions according the permutation vector
*
* @param[in,out] dimensions Dimensions to be permuted.
@@ -728,7 +69,7 @@ template <typename T>
inline void permute_strides(Dimensions<T> &dimensions, const PermutationVector &perm)
{
const auto old_dim = utility::make_array<Dimensions<T>::num_max_dimensions>(dimensions.begin(), dimensions.end());
- for(unsigned int i = 0; i < perm.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < perm.num_dimensions(); ++i)
{
T dimension_val = old_dim[i];
dimensions.set(perm[i], dimension_val);
@@ -746,7 +87,11 @@ inline void permute_strides(Dimensions<T> &dimensions, const PermutationVector &
*
* @return PadStrideInfo for SAME padding
*/
-PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info, DataLayout data_layout = DataLayout::NCHW, const Size2D &dilation = Size2D(1u, 1u),
+PadStrideInfo calculate_same_pad(TensorShape input_shape,
+ TensorShape weights_shape,
+ PadStrideInfo conv_info,
+ DataLayout data_layout = DataLayout::NCHW,
+ const Size2D &dilation = Size2D(1u, 1u),
const DimensionRoundingType &rounding_type = DimensionRoundingType::FLOOR);
/** Returns expected width and height of the deconvolution's output tensor.
@@ -759,8 +104,10 @@ PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_sh
*
* @return A pair with the new width in the first position and the new height in the second.
*/
-std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height,
- unsigned int kernel_width, unsigned int kernel_height,
+std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned int in_width,
+ unsigned int in_height,
+ unsigned int kernel_width,
+ unsigned int kernel_height,
const PadStrideInfo &pad_stride_info);
/** Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
@@ -774,8 +121,10 @@ std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned i
*
* @return A pair with the new width in the first position and the new height in the second.
*/
-std::pair<unsigned int, unsigned int> scaled_dimensions(int width, int height,
- int kernel_width, int kernel_height,
+std::pair<unsigned int, unsigned int> scaled_dimensions(int width,
+ int height,
+ int kernel_width,
+ int kernel_height,
const PadStrideInfo &pad_stride_info,
const Size2D &dilation = Size2D(1U, 1U));
@@ -789,9 +138,29 @@ std::pair<unsigned int, unsigned int> scaled_dimensions(int width, int height,
*
* @return A pair with the new width in the first position and the new height in the second, returned values can be < 1
*/
-std::pair<int, int> scaled_dimensions_signed(int width, int height,
- int kernel_width, int kernel_height,
- const PadStrideInfo &pad_stride_info);
+std::pair<int, int> scaled_dimensions_signed(
+ int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info);
+
+/** Returns calculated width, height and depth of output scaled tensor depending on dimensions rounding mode.
+ *
+ * @param[in] width Width of input tensor
+ * @param[in] height Height of input tensor
+ * @param[in] depth Depth of input tensor
+ * @param[in] kernel_width Kernel width.
+ * @param[in] kernel_height Kernel height.
+ * @param[in] kernel_depth Kernel depth.
+ * @param[in] pool3d_info Pad and stride and round information for 3d pooling
+ *
+ * @return A tuple with the new width in the first position, the new height in the second, and the new depth in the third.
+ * Returned values can be < 1
+ */
+std::tuple<int, int, int> scaled_3d_dimensions_signed(int width,
+ int height,
+ int depth,
+ int kernel_width,
+ int kernel_height,
+ int kernel_depth,
+ const Pooling3dLayerInfo &pool3d_info);
/** Check if the given reduction operation should be handled in a serial way.
*
@@ -820,15 +189,9 @@ QuantizationInfo get_softmax_output_quantization_info(DataType input_type, bool
*
* @return The pair with minimum and maximum values
*/
-std::pair<int32_t, int32_t> get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info);
-
-/** Convert a tensor format into a string.
- *
- * @param[in] format @ref Format to be translated to string.
- *
- * @return The string describing the format.
- */
-const std::string &string_from_format(Format format);
+std::pair<int32_t, int32_t> get_quantized_activation_min_max(const ActivationLayerInfo &act_info,
+ DataType data_type,
+ UniformQuantizationInfo oq_info);
/** Convert a channel identity into a string.
*
@@ -837,34 +200,7 @@ const std::string &string_from_format(Format format);
* @return The string describing the channel.
*/
const std::string &string_from_channel(Channel channel);
-/** Convert a data layout identity into a string.
- *
- * @param[in] dl @ref DataLayout to be translated to string.
- *
- * @return The string describing the data layout.
- */
-const std::string &string_from_data_layout(DataLayout dl);
-/** Convert a data type identity into a string.
- *
- * @param[in] dt @ref DataType to be translated to string.
- *
- * @return The string describing the data type.
- */
-const std::string &string_from_data_type(DataType dt);
-/** Translates a given activation function to a string.
- *
- * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string.
- *
- * @return The string describing the activation function.
- */
-const std::string &string_from_activation_func(ActivationLayerInfo::ActivationFunction act);
-/** Translates a given interpolation policy to a string.
- *
- * @param[in] policy @ref InterpolationPolicy to be translated to string.
- *
- * @return The string describing the interpolation policy.
- */
-const std::string &string_from_interpolation_policy(InterpolationPolicy policy);
+
/** Translates a given border mode policy to a string.
*
* @param[in] border_mode @ref BorderMode to be translated to string.
@@ -886,6 +222,30 @@ const std::string &string_from_norm_type(NormType type);
* @return The string describing the pooling type.
*/
const std::string &string_from_pooling_type(PoolingType type);
+/** Check if the pool region is entirely outside the input tensor
+ *
+ * @param[in] info @ref PoolingLayerInfo to be checked.
+ *
+ * @return True if the pool region is entirely outside the input tensor, False otherwise.
+ */
+bool is_pool_region_entirely_outside_input(const PoolingLayerInfo &info);
+/** Check if the 3d pool region is entirely outside the input tensor
+ *
+ * @param[in] info @ref Pooling3dLayerInfo to be checked.
+ *
+ * @return True if the pool region is entirely outside the input tensor, False otherwise.
+ */
+bool is_pool_3d_region_entirely_outside_input(const Pooling3dLayerInfo &info);
+/** Check if the 3D padding is symmetric i.e. padding in each opposite sides are euqal (left=right, top=bottom and front=back)
+ *
+ * @param[in] info @ref Padding3D input 3D padding object to check if it is symmetric
+ *
+ * @return True if padding is symmetric
+ */
+inline bool is_symmetric(const Padding3D &info)
+{
+ return ((info.left == info.right) && (info.top == info.bottom) && (info.front == info.back));
+}
/** Translates a given GEMMLowp output stage to a string.
*
* @param[in] output_stage @ref GEMMLowpOutputStageInfo to be translated to string.
@@ -901,13 +261,7 @@ const std::string &string_from_gemmlowp_output_stage(GEMMLowpOutputStageType out
* @return String representation of the PixelValue through the given data type.
*/
std::string string_from_pixel_value(const PixelValue &value, const DataType data_type);
-/** Convert a string to DataType
- *
- * @param[in] name The name of the data type
- *
- * @return DataType
- */
-DataType data_type_from_name(const std::string &name);
+
/** Stores padding information before configuring a kernel
*
* @param[in] infos list of tensor infos to store the padding info for
@@ -930,162 +284,6 @@ std::unordered_map<const ITensorInfo *, PaddingSize> get_padding_info(std::initi
*/
bool has_padding_changed(const std::unordered_map<const ITensorInfo *, PaddingSize> &padding_map);
-/** Input Stream operator for @ref DataType
- *
- * @param[in] stream Stream to parse
- * @param[out] data_type Output data type
- *
- * @return Updated stream
- */
-inline ::std::istream &operator>>(::std::istream &stream, DataType &data_type)
-{
- std::string value;
- stream >> value;
- data_type = data_type_from_name(value);
- return stream;
-}
-/** Lower a given string.
- *
- * @param[in] val Given string to lower.
- *
- * @return The lowered string
- */
-std::string lower_string(const std::string &val);
-
-/** Check if a given data type is of floating point type
- *
- * @param[in] dt Input data type.
- *
- * @return True if data type is of floating point type, else false.
- */
-inline bool is_data_type_float(DataType dt)
-{
- switch(dt)
- {
- case DataType::F16:
- case DataType::F32:
- return true;
- default:
- return false;
- }
-}
-
-/** Check if a given data type is of quantized type
- *
- * @note Quantized is considered a super-set of fixed-point and asymmetric data types.
- *
- * @param[in] dt Input data type.
- *
- * @return True if data type is of quantized type, else false.
- */
-inline bool is_data_type_quantized(DataType dt)
-{
- switch(dt)
- {
- case DataType::QSYMM8:
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QSYMM8_PER_CHANNEL:
- case DataType::QSYMM16:
- case DataType::QASYMM16:
- return true;
- default:
- return false;
- }
-}
-
-/** Check if a given data type is of asymmetric quantized type
- *
- * @param[in] dt Input data type.
- *
- * @return True if data type is of asymmetric quantized type, else false.
- */
-inline bool is_data_type_quantized_asymmetric(DataType dt)
-{
- switch(dt)
- {
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::QASYMM16:
- return true;
- default:
- return false;
- }
-}
-
-/** Check if a given data type is of asymmetric quantized signed type
- *
- * @param[in] dt Input data type.
- *
- * @return True if data type is of asymmetric quantized signed type, else false.
- */
-inline bool is_data_type_quantized_asymmetric_signed(DataType dt)
-{
- switch(dt)
- {
- case DataType::QASYMM8_SIGNED:
- return true;
- default:
- return false;
- }
-}
-
-/** Check if a given data type is of symmetric quantized type
- *
- * @param[in] dt Input data type.
- *
- * @return True if data type is of symmetric quantized type, else false.
- */
-inline bool is_data_type_quantized_symmetric(DataType dt)
-{
- switch(dt)
- {
- case DataType::QSYMM8:
- case DataType::QSYMM8_PER_CHANNEL:
- case DataType::QSYMM16:
- return true;
- default:
- return false;
- }
-}
-
-/** Check if a given data type is of per channel type
- *
- * @param[in] dt Input data type.
- *
- * @return True if data type is of per channel type, else false.
- */
-inline bool is_data_type_quantized_per_channel(DataType dt)
-{
- switch(dt)
- {
- case DataType::QSYMM8_PER_CHANNEL:
- return true;
- default:
- return false;
- }
-}
-
-/** Create a string with the float in full precision.
- *
- * @param val Floating point value
- *
- * @return String with the floating point value.
- */
-inline std::string float_to_string_with_full_precision(float val)
-{
- std::stringstream ss;
- ss.precision(std::numeric_limits<float>::max_digits10);
- ss << val;
-
- if(val != static_cast<int>(val))
- {
- ss << "f";
- }
-
- return ss.str();
-}
-
/** Returns the number of elements required to go from start to end with the wanted step
*
* @param[in] start start value
@@ -1100,91 +298,6 @@ inline size_t num_of_elements_in_range(const float start, const float end, const
return size_t(std::ceil((end - start) / step));
}
-/** Returns true if the value can be represented by the given data type
- *
- * @param[in] val value to be checked
- * @param[in] dt data type that is checked
- * @param[in] qinfo (Optional) quantization info if the data type is QASYMM8
- *
- * @return true if the data type can hold the value.
- */
-template <typename T>
-bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = QuantizationInfo())
-{
- switch(dt)
- {
- case DataType::U8:
- {
- const auto val_u8 = static_cast<uint8_t>(val);
- return ((val_u8 == val) && val >= std::numeric_limits<uint8_t>::lowest() && val <= std::numeric_limits<uint8_t>::max());
- }
- case DataType::QASYMM8:
- {
- double min = static_cast<double>(dequantize_qasymm8(0, qinfo));
- double max = static_cast<double>(dequantize_qasymm8(std::numeric_limits<uint8_t>::max(), qinfo));
- return ((double)val >= min && (double)val <= max);
- }
- case DataType::S8:
- {
- const auto val_s8 = static_cast<int8_t>(val);
- return ((val_s8 == val) && val >= std::numeric_limits<int8_t>::lowest() && val <= std::numeric_limits<int8_t>::max());
- }
- case DataType::U16:
- {
- const auto val_u16 = static_cast<uint16_t>(val);
- return ((val_u16 == val) && val >= std::numeric_limits<uint16_t>::lowest() && val <= std::numeric_limits<uint16_t>::max());
- }
- case DataType::S16:
- {
- const auto val_s16 = static_cast<int16_t>(val);
- return ((val_s16 == val) && val >= std::numeric_limits<int16_t>::lowest() && val <= std::numeric_limits<int16_t>::max());
- }
- case DataType::U32:
- {
- const auto val_u32 = static_cast<uint32_t>(val);
- return ((val_u32 == val) && val >= std::numeric_limits<uint32_t>::lowest() && val <= std::numeric_limits<uint32_t>::max());
- }
- case DataType::S32:
- {
- const auto val_s32 = static_cast<int32_t>(val);
- return ((val_s32 == val) && val >= std::numeric_limits<int32_t>::lowest() && val <= std::numeric_limits<int32_t>::max());
- }
- case DataType::BFLOAT16:
- return (val >= bfloat16::lowest() && val <= bfloat16::max());
- case DataType::F16:
- return (val >= std::numeric_limits<half>::lowest() && val <= std::numeric_limits<half>::max());
- case DataType::F32:
- return (val >= std::numeric_limits<float>::lowest() && val <= std::numeric_limits<float>::max());
- default:
- ARM_COMPUTE_ERROR("Data type not supported");
- return false;
- }
-}
-
-/** Returns the adjusted vector size in case it is less than the input's first dimension, getting rounded down to its closest valid vector size
- *
- * @param[in] vec_size vector size to be adjusted
- * @param[in] dim0 size of the first dimension
- *
- * @return the number of element processed along the X axis per thread
- */
-inline unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
-{
- ARM_COMPUTE_ERROR_ON(vec_size > 16);
-
- if((vec_size >= dim0) && (dim0 == 3))
- {
- return dim0;
- }
-
- while(vec_size > dim0)
- {
- vec_size >>= 1;
- }
-
- return vec_size;
-}
-
#ifdef ARM_COMPUTE_ASSERTS_ENABLED
/** Print consecutive elements to an output stream.
*
@@ -1195,26 +308,27 @@ inline unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
* @param[in] element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter
*/
template <typename T>
-void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ")
+void print_consecutive_elements_impl(
+ std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ")
{
using print_type = typename std::conditional<std::is_floating_point<T>::value, T, int>::type;
std::ios stream_status(nullptr);
stream_status.copyfmt(s);
- for(unsigned int i = 0; i < n; ++i)
+ for (unsigned int i = 0; i < n; ++i)
{
// Set stream width as it is not a "sticky" stream manipulator
- if(stream_width != 0)
+ if (stream_width != 0)
{
s.width(stream_width);
}
- if(std::is_same<typename std::decay<T>::type, half>::value)
+ if (std::is_same<typename std::decay<T>::type, half>::value)
{
// We use T instead of print_type here is because the std::is_floating_point<half> returns false and then the print_type becomes int.
s << std::right << static_cast<T>(ptr[i]) << element_delim;
}
- else if(std::is_same<typename std::decay<T>::type, bfloat16>::value)
+ else if (std::is_same<typename std::decay<T>::type, bfloat16>::value)
{
// We use T instead of print_type here is because the std::is_floating_point<bfloat16> returns false and then the print_type becomes int.
s << std::right << float(ptr[i]) << element_delim;
@@ -1243,17 +357,17 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u
using print_type = typename std::conditional<std::is_floating_point<T>::value, T, int>::type;
int max_width = -1;
- for(unsigned int i = 0; i < n; ++i)
+ for (unsigned int i = 0; i < n; ++i)
{
std::stringstream ss;
ss.copyfmt(s);
- if(std::is_same<typename std::decay<T>::type, half>::value)
+ if (std::is_same<typename std::decay<T>::type, half>::value)
{
// We use T instead of print_type here is because the std::is_floating_point<half> returns false and then the print_type becomes int.
ss << static_cast<T>(ptr[i]);
}
- else if(std::is_same<typename std::decay<T>::type, bfloat16>::value)
+ else if (std::is_same<typename std::decay<T>::type, bfloat16>::value)
{
// We use T instead of print_type here is because the std::is_floating_point<bfloat> returns false and then the print_type becomes int.
ss << float(ptr[i]);
@@ -1277,7 +391,12 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u
* @param[in] stream_width (Optional) Width of the stream. If set to 0 the element's width is used. Defaults to 0.
* @param[in] element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter
*/
-void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n, int stream_width, const std::string &element_delim = " ");
+void print_consecutive_elements(std::ostream &s,
+ DataType dt,
+ const uint8_t *ptr,
+ unsigned int n,
+ int stream_width,
+ const std::string &element_delim = " ");
/** Identify the maximum width of n consecutive elements.
*
@@ -1290,5 +409,5 @@ void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr
*/
int max_consecutive_elements_display_width(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n);
#endif /* ARM_COMPUTE_ASSERTS_ENABLED */
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_UTILS_H */
diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h
index e755cacae6..5550560aff 100644
--- a/arm_compute/core/Validate.h
+++ b/arm_compute/core/Validate.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,10 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/IKernel.h"
#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/QuantizationInfo.h"
+#include "arm_compute/core/utils/DataLayoutUtils.h"
+#include "arm_compute/core/utils/DataTypeUtils.h"
+#include "arm_compute/core/utils/FormatUtils.h"
#include "arm_compute/core/Window.h"
#include <algorithm>
@@ -46,9 +50,9 @@ namespace detail
template <typename T>
inline bool have_different_dimensions(const Dimensions<T> &dim1, const Dimensions<T> &dim2, unsigned int upper_dim)
{
- for(unsigned int i = upper_dim; i < arm_compute::Dimensions<T>::num_max_dimensions; ++i)
+ for (unsigned int i = upper_dim; i < arm_compute::Dimensions<T>::num_max_dimensions; ++i)
{
- if(dim1[i] != dim2[i])
+ if (dim1[i] != dim2[i])
{
return true;
}
@@ -76,7 +80,7 @@ public:
* @param[in] line Source code line. Used for error reporting.
*/
compare_dimension(const Dimensions<T> &dim, const char *function, const char *file, int line)
- : _dim{ dim }, _function{ function }, _file{ file }, _line{ line }
+ : _dim{dim}, _function{function}, _file{file}, _line{line}
{
}
@@ -107,7 +111,7 @@ inline arm_compute::Status for_each_error(F &&)
}
template <typename F, typename T, typename... Ts>
-inline arm_compute::Status for_each_error(F &&func, T &&arg, Ts &&... args)
+inline arm_compute::Status for_each_error(F &&func, T &&arg, Ts &&...args)
{
ARM_COMPUTE_RETURN_ON_ERROR(func(arg));
ARM_COMPUTE_RETURN_ON_ERROR(for_each_error(func, args...));
@@ -144,13 +148,11 @@ struct get_tensor_info_t<ITensorInfo *>
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_nullptr(const char *function, const char *file, const int line, Ts &&... pointers)
+inline arm_compute::Status error_on_nullptr(const char *function, const char *file, const int line, Ts &&...pointers)
{
- const std::array<const void *, sizeof...(Ts)> pointers_array{ { std::forward<Ts>(pointers)... } };
- bool has_nullptr = std::any_of(pointers_array.begin(), pointers_array.end(), [&](const void *ptr)
- {
- return (ptr == nullptr);
- });
+ const std::array<const void *, sizeof...(Ts)> pointers_array{{std::forward<Ts>(pointers)...}};
+ bool has_nullptr =
+ std::any_of(pointers_array.begin(), pointers_array.end(), [&](const void *ptr) { return (ptr == nullptr); });
ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(has_nullptr, function, file, line, "Nullptr object!");
return arm_compute::Status{};
}
@@ -174,8 +176,8 @@ inline arm_compute::Status error_on_nullptr(const char *function, const char *fi
*
* @return Status
*/
-arm_compute::Status error_on_mismatching_windows(const char *function, const char *file, const int line,
- const Window &full, const Window &win);
+arm_compute::Status error_on_mismatching_windows(
+ const char *function, const char *file, const int line, const Window &full, const Window &win);
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(f, w) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_windows(__func__, __FILE__, __LINE__, f, w))
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_WINDOWS(f, w) \
@@ -196,8 +198,8 @@ arm_compute::Status error_on_mismatching_windows(const char *function, const cha
*
* @return Status
*/
-arm_compute::Status error_on_invalid_subwindow(const char *function, const char *file, const int line,
- const Window &full, const Window &sub);
+arm_compute::Status error_on_invalid_subwindow(
+ const char *function, const char *file, const int line, const Window &full, const Window &sub);
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subwindow(__func__, __FILE__, __LINE__, f, s))
#define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBWINDOW(f, s) \
@@ -216,12 +218,14 @@ arm_compute::Status error_on_invalid_subwindow(const char *function, const char
*
* @return Status
*/
-arm_compute::Status error_on_window_not_collapsable_at_dimension(const char *function, const char *file, const int line,
- const Window &full, const Window &window, const int dim);
+arm_compute::Status error_on_window_not_collapsable_at_dimension(
+ const char *function, const char *file, const int line, const Window &full, const Window &window, const int dim);
#define ARM_COMPUTE_ERROR_ON_WINDOW_NOT_COLLAPSABLE_AT_DIMENSION(f, w, d) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
#define ARM_COMPUTE_RETURN_ERROR_ON_WINDOW_NOT_COLLAPSABLE_AT_DIMENSION(f, w, d) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
/** Return an error if the passed coordinates have too many dimensions.
*
@@ -235,8 +239,8 @@ arm_compute::Status error_on_window_not_collapsable_at_dimension(const char *fun
*
* @return Status
*/
-arm_compute::Status error_on_coordinates_dimensions_gte(const char *function, const char *file, const int line,
- const Coordinates &pos, unsigned int max_dim);
+arm_compute::Status error_on_coordinates_dimensions_gte(
+ const char *function, const char *file, const int line, const Coordinates &pos, unsigned int max_dim);
#define ARM_COMPUTE_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_coordinates_dimensions_gte(__func__, __FILE__, __LINE__, p, md))
#define ARM_COMPUTE_RETURN_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) \
@@ -254,8 +258,8 @@ arm_compute::Status error_on_coordinates_dimensions_gte(const char *function, co
*
* @return Status
*/
-arm_compute::Status error_on_window_dimensions_gte(const char *function, const char *file, const int line,
- const Window &win, unsigned int max_dim);
+arm_compute::Status error_on_window_dimensions_gte(
+ const char *function, const char *file, const int line, const Window &win, unsigned int max_dim);
#define ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_window_dimensions_gte(__func__, __FILE__, __LINE__, w, md))
#define ARM_COMPUTE_RETURN_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) \
@@ -273,16 +277,82 @@ arm_compute::Status error_on_window_dimensions_gte(const char *function, const c
* @return Status
*/
template <typename T, typename... Ts>
-arm_compute::Status error_on_mismatching_dimensions(const char *function, const char *file, int line,
- const Dimensions<T> &dim1, const Dimensions<T> &dim2, Ts &&... dims)
+arm_compute::Status error_on_mismatching_dimensions(const char *function,
+ const char *file,
+ int line,
+ const Dimensions<T> &dim1,
+ const Dimensions<T> &dim2,
+ Ts &&...dims)
{
- ARM_COMPUTE_RETURN_ON_ERROR(detail::for_each_error(detail::compare_dimension<T>(dim1, function, file, line), dim2, std::forward<Ts>(dims)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(detail::for_each_error(detail::compare_dimension<T>(dim1, function, file, line), dim2,
+ std::forward<Ts>(dims)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
+
+/** Return true if the given format has horizontal subsampling.
+ *
+ * @param[in] format Format to determine subsampling.
+ *
+ * @return True if the format can be subsampled horizontaly.
+ */
+inline bool has_format_horizontal_subsampling(Format format)
+{
+ return (format == Format::YUYV422 || format == Format::UYVY422 || format == Format::NV12 ||
+ format == Format::NV21 || format == Format::IYUV || format == Format::UV88)
+ ? true
+ : false;
+}
+
+/** Return true if the given format has vertical subsampling.
+ *
+ * @param[in] format Format to determine subsampling.
+ *
+ * @return True if the format can be subsampled verticaly.
+ */
+inline bool has_format_vertical_subsampling(Format format)
+{
+ return (format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88)
+ ? true
+ : false;
+}
+
+/** Adjust tensor shape size if width or height are odd for a given multi-planar format. No modification is done for other formats.
+ *
+ * @note Adding here a few links discussing the issue of odd size and sharing the same solution:
+ * <a href="https://android.googlesource.com/platform/frameworks/base/+/refs/heads/master/graphics/java/android/graphics/YuvImage.java">Android Source</a>
+ * <a href="https://groups.google.com/a/webmproject.org/forum/#!topic/webm-discuss/LaCKpqiDTXM">WebM</a>
+ * <a href="https://bugs.chromium.org/p/libyuv/issues/detail?id=198&amp;can=1&amp;q=odd%20width">libYUV</a>
+ * <a href="https://sourceforge.net/p/raw-yuvplayer/bugs/1/">YUVPlayer</a> *
+ *
+ * @param[in, out] shape Tensor shape of 2D size
+ * @param[in] format Format of the tensor
+ *
+ * @return The adjusted tensor shape.
+ */
+inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format)
+{
+ TensorShape output{shape};
+
+ // Force width to be even for formats which require subsampling of the U and V channels
+ if (has_format_horizontal_subsampling(format))
+ {
+ output.set(0, (output.x() + 1) & ~1U);
+ }
+
+ // Force height to be even for formats which require subsampling of the U and V channels
+ if (has_format_vertical_subsampling(format))
+ {
+ output.set(1, (output.y() + 1) & ~1U);
+ }
+
+ return output;
+}
/** Return an error if the passed tensor objects are not even.
*
@@ -296,18 +366,20 @@ arm_compute::Status error_on_mismatching_dimensions(const char *function, const
* @return Status
*/
template <typename... Ts>
-arm_compute::Status error_on_tensors_not_even(const char *function, const char *file, int line,
- const Format &format, const ITensor *tensor1, Ts... tensors)
+arm_compute::Status error_on_tensors_not_even(
+ const char *function, const char *file, int line, const Format &format, const ITensor *tensor1, Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor1 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...));
- const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_info_array{ { tensor1, std::forward<Ts>(tensors)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), [&](const ITensor * tensor)
- {
- const TensorShape correct_shape = adjust_odd_shape(tensor->info()->tensor_shape(), format);
- return detail::have_different_dimensions(tensor->info()->tensor_shape(), correct_shape, 2);
- }),
- function, file, line, "Tensor shape has odd dimensions");
+ const std::array<const ITensor *, 1 + sizeof...(Ts)> tensors_info_array{{tensor1, std::forward<Ts>(tensors)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(
+ std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(),
+ [&](const ITensor *tensor)
+ {
+ const TensorShape correct_shape = adjust_odd_shape(tensor->info()->tensor_shape(), format);
+ return detail::have_different_dimensions(tensor->info()->tensor_shape(), correct_shape, 2);
+ }),
+ function, file, line, "Tensor shape has odd dimensions");
return arm_compute::Status{};
}
@@ -316,6 +388,38 @@ arm_compute::Status error_on_tensors_not_even(const char *function, const char *
#define ARM_COMPUTE_RETURN_ERROR_ON_TENSORS_NOT_EVEN(...) \
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_tensors_not_even(__func__, __FILE__, __LINE__, __VA_ARGS__))
+/** Calculate subsampled shape for a given format and channel
+ *
+ * @param[in] shape Shape of the tensor to calculate the extracted channel.
+ * @param[in] format Format of the tensor.
+ * @param[in] channel Channel to create tensor shape to be extracted.
+ *
+ * @return The subsampled tensor shape.
+ */
+inline TensorShape
+calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel = Channel::UNKNOWN)
+{
+ TensorShape output{shape};
+
+ // Subsample shape only for U or V channel
+ if (Channel::U == channel || Channel::V == channel || Channel::UNKNOWN == channel)
+ {
+ // Subsample width for the tensor shape when channel is U or V
+ if (has_format_horizontal_subsampling(format))
+ {
+ output.set(0, output.x() / 2U);
+ }
+
+ // Subsample height for the tensor shape when channel is U or V
+ if (has_format_vertical_subsampling(format))
+ {
+ output.set(1, output.y() / 2U);
+ }
+ }
+
+ return output;
+}
+
/** Return an error if the passed tensor objects are not sub-sampled.
*
* @param[in] function Function in which the error occurred.
@@ -329,25 +433,32 @@ arm_compute::Status error_on_tensors_not_even(const char *function, const char *
* @return Status
*/
template <typename... Ts>
-arm_compute::Status error_on_tensors_not_subsampled(const char *function, const char *file, int line,
- const Format &format, const TensorShape &shape, const ITensor *tensor1, Ts... tensors)
+arm_compute::Status error_on_tensors_not_subsampled(const char *function,
+ const char *file,
+ int line,
+ const Format &format,
+ const TensorShape &shape,
+ const ITensor *tensor1,
+ Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor1 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...));
- const TensorShape sub2_shape = calculate_subsampled_shape(shape, format);
- const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_info_array{ { tensor1, std::forward<Ts>(tensors)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), [&](const ITensor * tensor)
- {
- return detail::have_different_dimensions(tensor->info()->tensor_shape(), sub2_shape, 2);
- }),
- function, file, line, "Tensor shape has mismatch dimensions for sub-sampling");
+ const TensorShape sub2_shape = calculate_subsampled_shape(shape, format);
+ const std::array<const ITensor *, 1 + sizeof...(Ts)> tensors_info_array{{tensor1, std::forward<Ts>(tensors)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(
+ std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(),
+ [&](const ITensor *tensor)
+ { return detail::have_different_dimensions(tensor->info()->tensor_shape(), sub2_shape, 2); }),
+ function, file, line, "Tensor shape has mismatch dimensions for sub-sampling");
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_TENSORS_NOT_SUBSAMPLED(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
/** Return an error if the passed two tensor infos have different shapes from the given dimension
*
@@ -361,10 +472,15 @@ arm_compute::Status error_on_tensors_not_subsampled(const char *function, const
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_shapes(const char *function,
+ const char *file,
+ const int line,
+ const ITensorInfo *tensor_info_1,
+ const ITensorInfo *tensor_info_2,
+ Ts... tensor_infos)
{
- return error_on_mismatching_shapes(function, file, line, 0U, tensor_info_1, tensor_info_2, std::forward<Ts>(tensor_infos)...);
+ return error_on_mismatching_shapes(function, file, line, 0U, tensor_info_1, tensor_info_2,
+ std::forward<Ts>(tensor_infos)...);
}
/** Return an error if the passed two tensors have different shapes from the given dimension
*
@@ -378,8 +494,12 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
- const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_shapes(const char *function,
+ const char *file,
+ const int line,
+ const ITensor *tensor_1,
+ const ITensor *tensor_2,
+ Ts... tensors)
{
return error_on_mismatching_shapes(function, file, line, 0U, tensor_1, tensor_2, std::forward<Ts>(tensors)...);
}
@@ -396,19 +516,28 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
- unsigned int upper_dim, const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_shapes(const char *function,
+ const char *file,
+ const int line,
+ unsigned int upper_dim,
+ const ITensorInfo *tensor_info_1,
+ const ITensorInfo *tensor_info_2,
+ Ts... tensor_infos)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info_1 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info_2 == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensor_infos)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
- const std::array < const ITensorInfo *, 2 + sizeof...(Ts) > tensors_info_array{ { tensor_info_1, tensor_info_2, std::forward<Ts>(tensor_infos)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_info_array.cbegin()), tensors_info_array.cend(), [&](const ITensorInfo * tensor_info)
- {
- return detail::have_different_dimensions((*tensors_info_array.cbegin())->tensor_shape(), tensor_info->tensor_shape(), upper_dim);
- }),
- function, file, line, "Tensors have different shapes");
+ const std::array<const ITensorInfo *, 2 + sizeof...(Ts)> tensors_info_array{
+ {tensor_info_1, tensor_info_2, tensor_infos...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_info_array.cbegin()), tensors_info_array.cend(),
+ [&](const ITensorInfo *tensor_info)
+ {
+ return detail::have_different_dimensions(
+ (*tensors_info_array.cbegin())->tensor_shape(),
+ tensor_info->tensor_shape(), upper_dim);
+ }),
+ function, file, line, "Tensors have different shapes");
return arm_compute::Status{};
}
/** Return an error if the passed two tensors have different shapes from the given dimension
@@ -424,14 +553,20 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
- unsigned int upper_dim, const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_shapes(const char *function,
+ const char *file,
+ const int line,
+ unsigned int upper_dim,
+ const ITensor *tensor_1,
+ const ITensor *tensor_2,
+ Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_1 == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_2 == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...));
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_shapes(function, file, line, upper_dim, tensor_1->info(), tensor_2->info(),
- detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensors...));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ ::arm_compute::error_on_mismatching_shapes(function, file, line, upper_dim, tensor_1->info(), tensor_2->info(),
+ detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(...) \
@@ -450,19 +585,18 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_data_layouts(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_data_layouts(
+ const char *function, const char *file, const int line, const ITensorInfo *tensor_info, Ts... tensor_infos)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensor_infos)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
- DataLayout &&tensor_data_layout = tensor_info->data_layout();
- const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{ { std::forward<Ts>(tensor_infos)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), [&](const ITensorInfo * tensor_info_obj)
- {
- return tensor_info_obj->data_layout() != tensor_data_layout;
- }),
- function, file, line, "Tensors have different data layouts");
+ DataLayout &&tensor_data_layout = tensor_info->data_layout();
+ const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{{tensor_infos...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(),
+ [&](const ITensorInfo *tensor_info_obj)
+ { return tensor_info_obj->data_layout() != tensor_data_layout; }),
+ function, file, line, "Tensors have different data layouts");
return arm_compute::Status{};
}
/** Return an error if the passed tensors have different data layouts
@@ -476,19 +610,21 @@ inline arm_compute::Status error_on_mismatching_data_layouts(const char *functio
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_data_layouts(const char *function, const char *file, const int line,
- const ITensor *tensor, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_data_layouts(
+ const char *function, const char *file, const int line, const ITensor *tensor, Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...));
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(function, file, line, tensor->info(),
- detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(
+ function, file, line, tensor->info(), detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_LAYOUT(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
/** Return an error if the passed two tensor infos have different data types
*
@@ -501,19 +637,18 @@ inline arm_compute::Status error_on_mismatching_data_layouts(const char *functio
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_data_types(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_data_types(
+ const char *function, const char *file, const int line, const ITensorInfo *tensor_info, Ts... tensor_infos)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensor_infos)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
- DataType &&tensor_data_type = tensor_info->data_type();
- const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{ { std::forward<Ts>(tensor_infos)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), [&](const ITensorInfo * tensor_info_obj)
- {
- return tensor_info_obj->data_type() != tensor_data_type;
- }),
- function, file, line, "Tensors have different data types");
+ DataType &&tensor_data_type = tensor_info->data_type();
+ const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{{tensor_infos...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(),
+ [&](const ITensorInfo *tensor_info_obj)
+ { return tensor_info_obj->data_type() != tensor_data_type; }),
+ function, file, line, "Tensors have different data types");
return arm_compute::Status{};
}
/** Return an error if the passed two tensors have different data types
@@ -527,19 +662,21 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function,
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_data_types(const char *function, const char *file, const int line,
- const ITensor *tensor, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_data_types(
+ const char *function, const char *file, const int line, const ITensor *tensor, Ts... tensors)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...));
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(function, file, line, tensor->info(),
- detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensors...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(
+ function, file, line, tensor->info(), detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
/** Return an error if the passed tensor infos have different asymmetric quantized data types or different quantization info
*
@@ -555,28 +692,32 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function,
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_quantization_info(const char *function,
+ const char *file,
+ const int line,
+ const ITensorInfo *tensor_info_1,
+ const ITensorInfo *tensor_info_2,
+ Ts... tensor_infos)
{
DataType &&first_data_type = tensor_info_1->data_type();
const QuantizationInfo first_quantization_info = tensor_info_1->quantization_info();
- if(!is_data_type_quantized(first_data_type))
+ if (!is_data_type_quantized(first_data_type))
{
return arm_compute::Status{};
}
- const std::array < const ITensorInfo *, 1 + sizeof...(Ts) > tensor_infos_array{ { tensor_info_2, std::forward<Ts>(tensor_infos)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info)
- {
- return tensor_info->data_type() != first_data_type;
- }),
- function, file, line, "Tensors have different asymmetric quantized data types");
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info)
- {
- return tensor_info->quantization_info() != first_quantization_info;
- }),
- function, file, line, "Tensors have different quantization information");
+ const std::array<const ITensorInfo *, 1 + sizeof...(Ts)> tensor_infos_array{
+ {tensor_info_2, std::forward<Ts>(tensor_infos)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(),
+ [&](const ITensorInfo *tensor_info)
+ { return tensor_info->data_type() != first_data_type; }),
+ function, file, line, "Tensors have different asymmetric quantized data types");
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(
+ std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(),
+ [&](const ITensorInfo *tensor_info)
+ { return tensor_info->quantization_info() != first_quantization_info; }),
+ function, file, line, "Tensors have different quantization information");
return arm_compute::Status{};
}
@@ -594,17 +735,24 @@ inline arm_compute::Status error_on_mismatching_quantization_info(const char *fu
* @return Status
*/
template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, const char *file, const int line,
- const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_quantization_info(const char *function,
+ const char *file,
+ const int line,
+ const ITensor *tensor_1,
+ const ITensor *tensor_2,
+ Ts... tensors)
{
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_quantization_info(function, file, line, tensor_1->info(), tensor_2->info(),
- detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ ::arm_compute::error_on_mismatching_quantization_info(function, file, line, tensor_1->info(), tensor_2->info(),
+ detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__))
/** Throw an error if the format of the passed tensor/multi-image does not match any of the formats provided.
*
@@ -616,8 +764,8 @@ inline arm_compute::Status error_on_mismatching_quantization_info(const char *fu
* @param[in] formats (Optional) Further allowed formats.
*/
template <typename T, typename F, typename... Fs>
-void error_on_format_not_in(const char *function, const char *file, const int line,
- const T *object, F &&format, Fs &&... formats)
+void error_on_format_not_in(
+ const char *function, const char *file, const int line, const T *object, F &&format, Fs &&...formats)
{
ARM_COMPUTE_ERROR_ON_LOC(object == nullptr, function, file, line);
@@ -626,17 +774,17 @@ void error_on_format_not_in(const char *function, const char *file, const int li
ARM_COMPUTE_ERROR_ON_LOC(object_format == Format::UNKNOWN, function, file, line);
- const std::array<F, sizeof...(Fs)> formats_array{ { std::forward<Fs>(formats)... } };
+ const std::array<F, sizeof...(Fs)> formats_array{{std::forward<Fs>(formats)...}};
ARM_COMPUTE_UNUSED(formats_array);
- ARM_COMPUTE_ERROR_ON_LOC_MSG(object_format != format && std::none_of(formats_array.begin(), formats_array.end(), [&](const F & f)
- {
- return f == object_format;
- }),
- function, file, line, "Format %s not supported by this kernel", string_from_format(object_format).c_str());
+ ARM_COMPUTE_ERROR_ON_LOC_MSG(
+ object_format != format &&
+ std::none_of(formats_array.begin(), formats_array.end(), [&](const F &f) { return f == object_format; }),
+ function, file, line, "Format %s not supported by this kernel", string_from_format(object_format).c_str());
ARM_COMPUTE_UNUSED(function, format, file, line);
}
-#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t, ...) ::arm_compute::error_on_format_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__)
+#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t, ...) \
+ ::arm_compute::error_on_format_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__)
/** Return an error if the data type of the passed tensor info does not match any of the data types provided.
*
@@ -650,20 +798,19 @@ void error_on_format_not_in(const char *function, const char *file, const int li
* @return Status
*/
template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_type_not_in(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info, T &&dt, Ts &&... dts)
+inline arm_compute::Status error_on_data_type_not_in(
+ const char *function, const char *file, const int line, const ITensorInfo *tensor_info, T &&dt, Ts &&...dts)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
const DataType &tensor_dt = tensor_info->data_type(); //NOLINT
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_dt == DataType::UNKNOWN, function, file, line);
- const std::array<T, sizeof...(Ts)> dts_array{ { std::forward<Ts>(dts)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_dt != dt && std::none_of(dts_array.begin(), dts_array.end(), [&](const T & d)
- {
- return d == tensor_dt;
- }),
- function, file, line, "ITensor data type %s not supported by this kernel", string_from_data_type(tensor_dt).c_str());
+ const std::array<T, sizeof...(Ts)> dts_array{{std::forward<Ts>(dts)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(
+ tensor_dt != dt && std::none_of(dts_array.begin(), dts_array.end(), [&](const T &d) { return d == tensor_dt; }),
+ function, file, line, "ITensor data type %s not supported by this kernel",
+ string_from_data_type(tensor_dt).c_str());
return arm_compute::Status{};
}
/** Return an error if the data type of the passed tensor does not match any of the data types provided.
@@ -678,11 +825,12 @@ inline arm_compute::Status error_on_data_type_not_in(const char *function, const
* @return Status
*/
template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_type_not_in(const char *function, const char *file, const int line,
- const ITensor *tensor, T &&dt, Ts &&... dts)
+inline arm_compute::Status error_on_data_type_not_in(
+ const char *function, const char *file, const int line, const ITensor *tensor, T &&dt, Ts &&...dts)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(function, file, line, tensor->info(), std::forward<T>(dt), std::forward<Ts>(dts)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(
+ function, file, line, tensor->info(), std::forward<T>(dt), std::forward<Ts>(dts)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(t, ...) \
@@ -702,20 +850,19 @@ inline arm_compute::Status error_on_data_type_not_in(const char *function, const
* @return Status
*/
template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_layout_not_in(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info, T &&dl, Ts &&... dls)
+inline arm_compute::Status error_on_data_layout_not_in(
+ const char *function, const char *file, const int line, const ITensorInfo *tensor_info, T &&dl, Ts &&...dls)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
const DataLayout &tensor_dl = tensor_info->data_layout(); //NOLINT
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_dl == DataLayout::UNKNOWN, function, file, line);
- const std::array<T, sizeof...(Ts)> dls_array{ { std::forward<Ts>(dls)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_dl != dl && std::none_of(dls_array.begin(), dls_array.end(), [&](const T & l)
- {
- return l == tensor_dl;
- }),
- function, file, line, "ITensor data layout %s not supported by this kernel", string_from_data_layout(tensor_dl).c_str());
+ const std::array<T, sizeof...(Ts)> dls_array{{std::forward<Ts>(dls)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(
+ tensor_dl != dl && std::none_of(dls_array.begin(), dls_array.end(), [&](const T &l) { return l == tensor_dl; }),
+ function, file, line, "ITensor data layout %s not supported by this kernel",
+ string_from_data_layout(tensor_dl).c_str());
return arm_compute::Status{};
}
/** Return an error if the data layout of the passed tensor does not match any of the data layout provided.
@@ -730,17 +877,19 @@ inline arm_compute::Status error_on_data_layout_not_in(const char *function, con
* @return Status
*/
template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_layout_not_in(const char *function, const char *file, const int line,
- const ITensor *tensor, T &&dl, Ts &&... dls)
+inline arm_compute::Status error_on_data_layout_not_in(
+ const char *function, const char *file, const int line, const ITensor *tensor, T &&dl, Ts &&...dls)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in(function, file, line, tensor->info(), std::forward<T>(dl), std::forward<Ts>(dls)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in(
+ function, file, line, tensor->info(), std::forward<T>(dl), std::forward<Ts>(dls)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_DATA_LAYOUT_NOT_IN(t, ...) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(t, ...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__))
/** Return an error if the data type or the number of channels of the passed tensor info does not match any of the data types and number of channels provided.
*
@@ -755,12 +904,20 @@ inline arm_compute::Status error_on_data_layout_not_in(const char *function, con
* @return Status
*/
template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_type_channel_not_in(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info, size_t num_channels, T &&dt, Ts &&... dts)
+inline arm_compute::Status error_on_data_type_channel_not_in(const char *function,
+ const char *file,
+ const int line,
+ const ITensorInfo *tensor_info,
+ size_t num_channels,
+ T &&dt,
+ Ts &&...dts)
{
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(function, file, line, tensor_info, std::forward<T>(dt), std::forward<Ts>(dts)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(
+ function, file, line, tensor_info, std::forward<T>(dt), std::forward<Ts>(dts)...));
const size_t tensor_nc = tensor_info->num_channels();
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_nc != num_channels, function, file, line, "Number of channels %zu. Required number of channels %zu", tensor_nc, num_channels);
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_nc != num_channels, function, file, line,
+ "Number of channels %zu. Required number of channels %zu", tensor_nc,
+ num_channels);
return arm_compute::Status{};
}
/** Return an error if the data type or the number of channels of the passed tensor does not match any of the data types and number of channels provided.
@@ -776,17 +933,25 @@ inline arm_compute::Status error_on_data_type_channel_not_in(const char *functio
* @return Status
*/
template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_type_channel_not_in(const char *function, const char *file, const int line,
- const ITensor *tensor, size_t num_channels, T &&dt, Ts &&... dts)
+inline arm_compute::Status error_on_data_type_channel_not_in(const char *function,
+ const char *file,
+ const int line,
+ const ITensor *tensor,
+ size_t num_channels,
+ T &&dt,
+ Ts &&...dts)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(error_on_data_type_channel_not_in(function, file, line, tensor->info(), num_channels, std::forward<T>(dt), std::forward<Ts>(dts)...));
+ ARM_COMPUTE_RETURN_ON_ERROR(error_on_data_type_channel_not_in(function, file, line, tensor->info(), num_channels,
+ std::forward<T>(dt), std::forward<Ts>(dts)...));
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c, ...) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__))
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c, ...) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__))
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__))
/** Return an error if the data type of the passed tensor info is FP16 and FP16 extension is not supported by the device.
*
@@ -798,12 +963,12 @@ inline arm_compute::Status error_on_data_type_channel_not_in(const char *functio
*
* @return Status
*/
-inline arm_compute::Status error_on_unsupported_fp16(const char *function, const char *file, const int line,
- const ITensorInfo *tensor_info, bool is_fp16_supported)
+inline arm_compute::Status error_on_unsupported_fp16(
+ const char *function, const char *file, const int line, const ITensorInfo *tensor_info, bool is_fp16_supported)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG((tensor_info->data_type() == DataType::F16 && !is_fp16_supported),
- function, file, line, "FP16 not supported by the device");
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG((tensor_info->data_type() == DataType::F16 && !is_fp16_supported), function,
+ file, line, "FP16 not supported by the device");
return arm_compute::Status{};
}
@@ -817,11 +982,12 @@ inline arm_compute::Status error_on_unsupported_fp16(const char *function, const
*
* @return Status
*/
-inline arm_compute::Status error_on_unsupported_fp16(const char *function, const char *file, const int line,
- const ITensor *tensor, bool is_fp16_supported)
+inline arm_compute::Status error_on_unsupported_fp16(
+ const char *function, const char *file, const int line, const ITensor *tensor, bool is_fp16_supported)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_fp16(function, file, line, tensor->info(), is_fp16_supported));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ ::arm_compute::error_on_unsupported_fp16(function, file, line, tensor->info(), is_fp16_supported));
return arm_compute::Status{};
}
@@ -834,8 +1000,8 @@ inline arm_compute::Status error_on_unsupported_fp16(const char *function, const
*
* @return Status
*/
-arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line,
- const ITensor *tensor);
+arm_compute::Status
+error_on_tensor_not_2d(const char *function, const char *file, const int line, const ITensor *tensor);
/** Return an error if the tensor info is not 2D.
*
@@ -846,8 +1012,8 @@ arm_compute::Status error_on_tensor_not_2d(const char *function, const char *fil
*
* @return Status
*/
-arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line,
- const ITensorInfo *tensor);
+arm_compute::Status
+error_on_tensor_not_2d(const char *function, const char *file, const int line, const ITensorInfo *tensor);
#define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensor_not_2d(__func__, __FILE__, __LINE__, t))
@@ -866,17 +1032,15 @@ arm_compute::Status error_on_tensor_not_2d(const char *function, const char *fil
* @return Status
*/
template <typename T, typename... Ts>
-inline arm_compute::Status error_on_channel_not_in(const char *function, const char *file, const int line,
- T cn, T &&channel, Ts &&... channels)
+inline arm_compute::Status
+error_on_channel_not_in(const char *function, const char *file, const int line, T cn, T &&channel, Ts &&...channels)
{
ARM_COMPUTE_RETURN_ERROR_ON_LOC(cn == Channel::UNKNOWN, function, file, line);
- const std::array<T, sizeof...(Ts)> channels_array{ { std::forward<Ts>(channels)... } };
- ARM_COMPUTE_RETURN_ERROR_ON_LOC(channel != cn && std::none_of(channels_array.begin(), channels_array.end(), [&](const T & f)
- {
- return f == cn;
- }),
- function, file, line);
+ const std::array<T, sizeof...(Ts)> channels_array{{std::forward<Ts>(channels)...}};
+ ARM_COMPUTE_RETURN_ERROR_ON_LOC(channel != cn && std::none_of(channels_array.begin(), channels_array.end(),
+ [&](const T &f) { return f == cn; }),
+ function, file, line);
return arm_compute::Status{};
}
#define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN(c, ...) \
@@ -894,8 +1058,8 @@ inline arm_compute::Status error_on_channel_not_in(const char *function, const c
*
* @return Status
*/
-arm_compute::Status error_on_channel_not_in_known_format(const char *function, const char *file, const int line,
- Format fmt, Channel cn);
+arm_compute::Status
+error_on_channel_not_in_known_format(const char *function, const char *file, const int line, Format fmt, Channel cn);
#define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(f, c) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_channel_not_in_known_format(__func__, __FILE__, __LINE__, f, c))
#define ARM_COMPUTE_RETURN_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(f, c) \
@@ -910,8 +1074,8 @@ arm_compute::Status error_on_channel_not_in_known_format(const char *function, c
*
* @return Status
*/
-arm_compute::Status error_on_unconfigured_kernel(const char *function, const char *file, const int line,
- const IKernel *kernel);
+arm_compute::Status
+error_on_unconfigured_kernel(const char *function, const char *file, const int line, const IKernel *kernel);
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unconfigured_kernel(__func__, __FILE__, __LINE__, k))
#define ARM_COMPUTE_RETURN_ERROR_ON_UNCONFIGURED_KERNEL(k) \
@@ -928,8 +1092,12 @@ arm_compute::Status error_on_unconfigured_kernel(const char *function, const cha
*
* @return Status
*/
-arm_compute::Status error_on_invalid_subtensor(const char *function, const char *file, const int line,
- const TensorShape &parent_shape, const Coordinates &coords, const TensorShape &shape);
+arm_compute::Status error_on_invalid_subtensor(const char *function,
+ const char *file,
+ const int line,
+ const TensorShape &parent_shape,
+ const Coordinates &coords,
+ const TensorShape &shape);
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR(p, c, s) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, p, c, s))
#define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBTENSOR(p, c, s) \
@@ -945,11 +1113,16 @@ arm_compute::Status error_on_invalid_subtensor(const char *function, const char
*
* @return Status
*/
-arm_compute::Status error_on_invalid_subtensor_valid_region(const char *function, const char *file, const int line,
- const ValidRegion &parent_valid_region, const ValidRegion &valid_region);
+arm_compute::Status error_on_invalid_subtensor_valid_region(const char *function,
+ const char *file,
+ const int line,
+ const ValidRegion &parent_valid_region,
+ const ValidRegion &valid_region);
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(pv, sv) \
- ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv))
+ ARM_COMPUTE_ERROR_THROW_ON( \
+ ::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv))
#define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(pv, sv) \
- ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv))
-}
+ ARM_COMPUTE_RETURN_ON_ERROR( \
+ ::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv))
+} // namespace arm_compute
#endif /* ARM_COMPUTE_VALIDATE_H*/
diff --git a/arm_compute/core/Version.h b/arm_compute/core/Version.h
index a4d307950a..44d400bad8 100644
--- a/arm_compute/core/Version.h
+++ b/arm_compute/core/Version.h
@@ -28,7 +28,7 @@
/* Macro utilities */
#define ARM_COMPUTE_STRINGIFY2(s) #s
-#define ARM_COMPUTE_STRINGIFY(s) ARM_COMPUTE_STRINGIFY2(s)
+#define ARM_COMPUTE_STRINGIFY(s) ARM_COMPUTE_STRINGIFY2(s)
#define ARM_COMPUTE_VERSION_STR \
ARM_COMPUTE_STRINGIFY(ARM_COMPUTE_VERSION_MAJOR) \
diff --git a/arm_compute/core/Window.h b/arm_compute/core/Window.h
index 150320a90e..e93d2863c9 100644
--- a/arm_compute/core/Window.h
+++ b/arm_compute/core/Window.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2020, 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,17 +21,17 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_WINDOW_H
-#define ARM_COMPUTE_WINDOW_H
-
-#include <algorithm>
-#include <array>
-#include <cstddef>
+#ifndef ACL_ARM_COMPUTE_CORE_WINDOW_H
+#define ACL_ARM_COMPUTE_CORE_WINDOW_H
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/ITensorInfo.h"
-#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/math/Math.h"
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
namespace arm_compute
{
@@ -47,6 +47,8 @@ public:
static constexpr size_t DimZ = 2;
/** Alias for dimension 3 also known as W dimension */
static constexpr size_t DimW = 3;
+ /** Alias for dimension 4 also known as V dimension */
+ static constexpr size_t DimV = 4;
/** Default constructor: create a window containing a single element. */
constexpr Window()
@@ -84,10 +86,10 @@ public:
* @param[in] step Step between two elements of the dimension when iterating.
*
*/
- constexpr Dimension(int start = 0, int end = 1, int step = 1)
- : _start(start), _end(end), _step(step)
+ constexpr Dimension(int start = 0, int end = 1, int step = 1) : _start(start), _end(end), _step(step)
{
}
+ Dimension(const Dimension &d) = default;
/** Default assignment operator to allow dimensions to be copied */
Dimension &operator=(const Dimension &d) = default;
/** Return the start of the dimension */
@@ -121,6 +123,17 @@ public:
{
_end = end;
}
+ /** Check whether two Dimensions are equal.
+ *
+ * @param[in] lhs LHS Dimensions
+ * @param[in] rhs RHS Dimensions
+ *
+ * @return True if the Dimensions are the same.
+ */
+ friend bool operator==(const Dimension &lhs, const Dimension &rhs)
+ {
+ return (lhs._start == rhs._start) && (lhs._end == rhs._end) && (lhs._step == rhs._step);
+ }
private:
int _start; /**< Start of the dimension */
@@ -200,15 +213,17 @@ public:
*/
void shift(size_t dimension, int shift_value);
- /** Shift down all the dimensions of a window
+ /** Shift down all the dimensions of a window starting from the specified dimension.
*
- * i.e new_dims[n] = old_dims[n+shift_value].
+ * new_dims[i] = old_dims[i] for all i < start_dim.
+ * new_dims[i] = old_dims[i+shift_value] for all i >= start_dim.
*
* @param[in] shift_value Number of dimensions to shift the window by.
+ * @param[in] start_dim The dimension from which the dimensions start to shift.
*
* @return The window with the shifted dimensions.
*/
- Window shift_dimensions(unsigned int shift_value) const;
+ Window shift_dimensions(unsigned int shift_value, unsigned int start_dim = 0) const;
/** Adjust the start or end of a given dimension by the given value
*
@@ -348,7 +363,6 @@ public:
{
return slide_window_slice<4>(slice);
}
-
/** Collapse the dimensions between @p first and @p last if possible.
*
* A dimension is collapsable if it starts from 0 and matches the corresponding dimension in the full_window
@@ -360,7 +374,8 @@ public:
*
* @return Collapsed window.
*/
- Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed = nullptr) const;
+ Window
+ collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed = nullptr) const;
/** Collapse the dimensions higher than @p first if possible.
*
@@ -413,6 +428,14 @@ public:
* @param[in] rhs Second window to swap.
*/
friend void swap(Window &lhs, Window &rhs);
+ /** Check whether two Windows are equal.
+ *
+ * @param[in] lhs LHS window
+ * @param[in] rhs RHS window
+ *
+ * @return True if the given windows are the same.
+ */
+ friend bool operator==(const Window &lhs, const Window &rhs);
private:
/** First slice of the window
@@ -420,7 +443,7 @@ private:
* @return The first slice of the window.
*/
template <unsigned int window_dimension>
- Window first_slice_window() const;
+ Window first_slice_window() const;
/** Slide the passed window slice.
*
@@ -439,4 +462,4 @@ private:
};
} // namespace arm_compute
#include "Window.inl"
-#endif /*ARM_COMPUTE_WINDOW_H */
+#endif // ACL_ARM_COMPUTE_CORE_WINDOW_H
diff --git a/arm_compute/core/Window.inl b/arm_compute/core/Window.inl
index 6100d09a1c..0f7c4fbdd7 100644
--- a/arm_compute/core/Window.inl
+++ b/arm_compute/core/Window.inl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2020, 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,12 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
+#ifndef ACL_ARM_COMPUTE_CORE_WINDOW_INL
+#define ACL_ARM_COMPUTE_CORE_WINDOW_INL
+
namespace arm_compute
{
inline Window::Window(const Window &src)
: _dims(), _is_broadcasted(utility::generate_array<bool, Coordinates::num_max_dimensions, false>::value)
{
- for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i)
+ for (size_t i = 0; i < Coordinates::num_max_dimensions; ++i)
{
set(i, src[i]);
_is_broadcasted[i] = src.is_broadcasted(i);
@@ -65,32 +69,34 @@ inline bool Window::is_broadcasted(size_t dimension) const
return _is_broadcasted[dimension];
}
-inline Window Window::collapse_if_possible(const Window &full_window, const size_t first,
- const size_t last, bool *has_collapsed) const
+inline Window Window::collapse_if_possible(const Window &full_window,
+ const size_t first,
+ const size_t last,
+ bool *has_collapsed) const
{
Window collapsed(*this);
bool is_collapsable = true;
int collapsed_end = _dims[first].end();
- for(size_t d = first + 1; is_collapsable && (d < last); ++d)
+ for (size_t d = first + 1; is_collapsable && (d < last); ++d)
{
// The _dims's dimension must match the full _dims dimension to be collapsable:
- is_collapsable = (_dims[d].start() == 0) && (full_window[d].start() == 0) && (_dims[d].step() <= 1)
- && (full_window[d].end() == _dims[d].end());
+ is_collapsable = (_dims[d].start() == 0) && (full_window[d].start() == 0) && (_dims[d].step() <= 1) &&
+ (full_window[d].end() == _dims[d].end());
collapsed_end *= _dims[d].end();
}
- if(is_collapsable)
+ if (is_collapsable)
{
collapsed._dims.at(first).set_end(collapsed_end);
- for(size_t d = first + 1; is_collapsable && (d < last); ++d)
+ for (size_t d = first + 1; is_collapsable && (d < last); ++d)
{
collapsed.set(d, Dimension());
}
}
- if(has_collapsed != nullptr)
+ if (has_collapsed != nullptr)
{
*has_collapsed = is_collapsable;
}
@@ -98,13 +104,21 @@ inline Window Window::collapse_if_possible(const Window &full_window, const size
return collapsed;
}
-inline Window Window::shift_dimensions(unsigned int shift_value) const
+inline Window Window::shift_dimensions(unsigned int shift_value, unsigned int start_dim) const
{
Window shifted_window;
- for(size_t n = 0; n < (Coordinates::num_max_dimensions - shift_value); n++)
+ size_t n = 0;
+
+ for (; n < start_dim; ++n)
+ {
+ shifted_window.set(n, _dims[n]);
+ }
+
+ for (; n < (Coordinates::num_max_dimensions - shift_value); n++)
{
shifted_window.set(n, _dims[n + shift_value]);
}
+
return shifted_window;
}
@@ -120,9 +134,9 @@ inline Window Window::collapse(const Window &full_window, const size_t first, co
inline Window Window::broadcast_if_dimension_le_one(const TensorShape &shape) const
{
Window broadcastWin(*this);
- for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
+ for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
{
- if(shape[d] <= 1)
+ if (shape[d] <= 1)
{
broadcastWin.set_broadcasted(d);
}
@@ -142,7 +156,7 @@ inline void Window::adjust(size_t dimension, int adjust_value, bool is_at_start)
ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions);
Window::Dimension &d = _dims[dimension];
- if(is_at_start)
+ if (is_at_start)
{
d = Window::Dimension(d.start() + adjust_value, d.end(), d.step());
}
@@ -172,7 +186,7 @@ inline void Window::set_dimension_step(size_t dimension, int step)
inline void Window::validate() const
{
- for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i)
+ for (size_t i = 0; i < Coordinates::num_max_dimensions; ++i)
{
ARM_COMPUTE_ERROR_ON(_dims[i].end() < _dims[i].start());
ARM_COMPUTE_ERROR_ON((_dims[i].step() != 0) && (((_dims[i].end() - _dims[i].start()) % _dims[i].step()) != 0));
@@ -193,9 +207,9 @@ inline Window Window::split_window(size_t dimension, size_t id, size_t total) co
Window out;
- for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d)
+ for (size_t d = 0; d < Coordinates::num_max_dimensions; ++d)
{
- if(d == dimension)
+ if (d == dimension)
{
int start = _dims[d].start();
int end = _dims[d].end();
@@ -207,7 +221,7 @@ inline Window Window::split_window(size_t dimension, size_t id, size_t total) co
int it_start = work * id;
- if(int(id) < rem)
+ if (int(id) < rem)
{
++work;
it_start += id;
@@ -234,18 +248,18 @@ inline Window Window::split_window(size_t dimension, size_t id, size_t total) co
template <unsigned int window_dimension>
inline bool Window::slide_window_slice(Window &slice) const
{
- for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n)
+ for (unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n)
{
// Did we reach the end of this dimension?
const int v = slice._dims[n].start() + 1;
- if(v < _dims[n].end())
+ if (v < _dims[n].end())
{
// No: increment
slice._dims[n] = Dimension(v, v + 1, 1);
// Reset lower dimensions:
- for(unsigned int lower = window_dimension; lower < n; ++lower)
+ for (unsigned int lower = window_dimension; lower < n; ++lower)
{
slice._dims[lower] = Dimension(_dims[lower].start(), _dims[lower].start() + 1, 1);
}
@@ -258,14 +272,14 @@ inline bool Window::slide_window_slice(Window &slice) const
}
template <unsigned int window_dimension>
-inline Window Window::first_slice_window() const
+inline Window Window::first_slice_window() const
{
Window slice;
std::copy_n(_dims.begin(), window_dimension, slice._dims.begin());
//Initialise higher dimensions to be the first slice.
- for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n)
+ for (unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n)
{
slice._dims[n] = Dimension(_dims[n].start(), _dims[n].start() + 1, 1);
}
@@ -275,7 +289,7 @@ inline Window Window::first_slice_window() const
inline void Window::use_tensor_dimensions(const TensorShape &shape, size_t first_dimension)
{
- for(unsigned int n = first_dimension; n < shape.num_dimensions(); ++n)
+ for (unsigned int n = first_dimension; n < shape.num_dimensions(); ++n)
{
set(n, Window::Dimension(0, std::max(shape[n], static_cast<size_t>(1))));
}
@@ -284,7 +298,7 @@ inline void Window::use_tensor_dimensions(const TensorShape &shape, size_t first
inline TensorShape Window::shape() const
{
TensorShape shape;
- for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
+ for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
{
shape.set(d, (_dims[d].end() - _dims[d].start()) / _dims[d].step());
}
@@ -294,7 +308,7 @@ inline TensorShape Window::shape() const
inline size_t Window::num_iterations_total() const
{
size_t total = 1;
- for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d)
+ for (size_t d = 0; d < Coordinates::num_max_dimensions; ++d)
{
total *= num_iterations(d);
}
@@ -305,4 +319,11 @@ inline void swap(Window &lhs, Window &rhs)
{
lhs._dims.swap(rhs._dims);
}
+
+inline bool operator==(const Window &lhs, const Window &rhs)
+{
+ return (lhs._dims == rhs._dims) && (lhs._is_broadcasted == rhs._is_broadcasted);
+}
} // namespace arm_compute
+
+#endif // ACL_ARM_COMPUTE_CORE_WINDOW_INL
diff --git a/arm_compute/core/WindowIterator.h b/arm_compute/core/WindowIterator.h
index c15a50cf47..29302c410a 100644
--- a/arm_compute/core/WindowIterator.h
+++ b/arm_compute/core/WindowIterator.h
@@ -28,10 +28,6 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Window.h"
-//FIXME: Delete the "ARM_COMPUTE_PRINTF" before the release. In the meantime it's probably going to be useful to debug
-//#define ARM_COMPUTE_PRINTF printf
-#define ARM_COMPUTE_PRINTF(...)
-
namespace arm_compute
{
/** Convert an offset in window steps into absolute coordinates.
@@ -44,7 +40,7 @@ namespace arm_compute
inline Coordinates convert_window_coord_to_position(const Window &w, const Coordinates &offset)
{
Coordinates position;
- for(unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
+ for (unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
{
position.set(i, w[i].start() + offset[i] * w[i].step());
}
@@ -168,16 +164,14 @@ public:
template <typename M>
void iterate_3D(M &&on_new_row_size)
{
- while(_end.z() != _position.z())
+ while (_end.z() != _position.z())
{
- ARM_COMPUTE_PRINTF("New slice %d\n", _position.z());
iterate_2D_internal(on_new_row_size, _w.x().end() - _w.x().step(), _w.y().end() - _w.y().step());
_position[2] += _w.z().step();
_position[1] = _w.y().start();
_position[0] = _w.x().start();
}
// Left over:
- ARM_COMPUTE_PRINTF("Left over slice\n");
iterate_2D(on_new_row_size);
}
@@ -217,29 +211,25 @@ private:
void iterate_2D_internal(M &&on_new_row_size, int end_x, int end_y)
{
//Is there more than one row to process ?
- if(end_y == _position.y())
+ if (end_y == _position.y())
{
- // Single row:
- ARM_COMPUTE_PRINTF("Partial row only\n");
// Both start and end belong to the same row:
iterate_over_dim0(end_x + _w.x().step(), on_new_row_size);
}
else
{
// Do we start from the beginning of the row ?
- if(_w.x().start() != _position.x())
+ if (_w.x().start() != _position.x())
{
//Start in the middle of a row: process left-over X
- ARM_COMPUTE_PRINTF("Partial row first\n");
iterate_over_dim0(_w.x().end(), on_new_row_size);
_position[1] += _w.y().step();
}
//Middle rows
bool no_leftover = end_x + _w.x().step() == _w.x().end();
- if(no_leftover)
+ if (no_leftover)
{
- ARM_COMPUTE_PRINTF("no left over\n");
//Switch to full row size:
on_new_row_size(_w[0].start(), _w.x().end());
// Shouldn't be possible to reach that point and not have at least one entire row to process
@@ -249,17 +239,14 @@ private:
}
else
{
- ARM_COMPUTE_PRINTF("with left over\n");
// Are there full rows to process ?
- if(_position[1] != end_y)
+ if (_position[1] != end_y)
{
- ARM_COMPUTE_PRINTF("full rows\n");
//Switch to full row size:
on_new_row_size(_w[0].start(), _w.x().end());
iterate_over_dim1(end_y);
}
- ARM_COMPUTE_PRINTF("Final leftover\n");
//Leftover end x
_position[0] = _w.x().start();
iterate_over_dim0(end_x + _w.x().step(), on_new_row_size);
@@ -273,7 +260,7 @@ private:
*/
void iterate_over_dim1(int end)
{
- for(; _position[1] != end; _position[1] += _w[1].step())
+ for (; _position[1] != end; _position[1] += _w[1].step())
{
_position[0] = _w[0].start();
iterate_over_dim0(_w[0].end());
@@ -298,10 +285,9 @@ private:
*/
void iterate_over_dim0(int end)
{
- ARM_COMPUTE_PRINTF("X [%d, %d, %d]\n", _position.x(), end, _w[0].step());
// Both start and end belong to the same row:
ARM_COMPUTE_ERROR_ON(_position[0] > end);
- for(; _position.x() < end; _position[0] += _w[0].step())
+ for (; _position.x() < end; _position[0] += _w[0].step())
{
_lambda_function(_position);
}
@@ -323,9 +309,10 @@ private:
* @return A WindowIterator object.
*/
template <typename L>
-WindowIterator<L> create_window_iterator(const Window &w, const Coordinates &start, const Coordinates &end, L &&lambda_function)
+WindowIterator<L>
+create_window_iterator(const Window &w, const Coordinates &start, const Coordinates &end, L &&lambda_function)
{
return WindowIterator<L>(w, start, end, std::move(lambda_function));
}
-}
+} // namespace arm_compute
#endif /*ARM_COMPUTE_WINDOW_ITERATOR_H*/
diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h
index 92ece460dc..63a3a1a1ec 100644
--- a/arm_compute/core/experimental/Types.h
+++ b/arm_compute/core/experimental/Types.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_EXPERIMENTAL_TYPES_H
-#define ARM_COMPUTE_EXPERIMENTAL_TYPES_H
+#ifndef ACL_ARM_COMPUTE_CORE_EXPERIMENTAL_TYPES_H
+#define ACL_ARM_COMPUTE_CORE_EXPERIMENTAL_TYPES_H
#include "arm_compute/core/ITensorPack.h"
#include "arm_compute/core/TensorShape.h"
@@ -39,15 +39,26 @@ enum TensorType : int32_t
{
ACL_UNKNOWN = -1,
ACL_SRC_DST = 0,
+
+ // Src
ACL_SRC = 0,
ACL_SRC_0 = 0,
ACL_SRC_1 = 1,
ACL_SRC_2 = 2,
+ ACL_SRC_3 = 3,
+ ACL_SRC_4 = 4,
+ ACL_SRC_5 = 5,
+ ACL_SRC_6 = 6,
+ ACL_SRC_END = 6,
+
+ // Dst
ACL_DST = 30,
ACL_DST_0 = 30,
ACL_DST_1 = 31,
ACL_DST_2 = 32,
- ACL_BIAS = ACL_SRC_2,
+ ACL_DST_END = 32,
+
+ // Aux
ACL_INT = 50,
ACL_INT_0 = 50,
ACL_INT_1 = 51,
@@ -56,7 +67,17 @@ enum TensorType : int32_t
ACL_INT_4 = 54,
ACL_SRC_VEC = 256,
ACL_DST_VEC = 512,
- ACL_INT_VEC = 1024
+ ACL_INT_VEC = 1024,
+
+ // Aliasing Types
+ // Conv etc
+ ACL_BIAS = ACL_SRC_2,
+
+ // Gemm
+ ACL_VEC_ROW_SUM = ACL_SRC_3,
+ ACL_VEC_COL_SUM = ACL_SRC_4,
+ ACL_SHIFTS = ACL_SRC_5,
+ ACL_MULTIPLIERS = ACL_SRC_6,
};
namespace experimental
@@ -71,27 +92,35 @@ struct MemoryInfo
{
MemoryInfo() = default;
- MemoryInfo(int slot, size_t size, size_t alignment = 0) noexcept
- : slot(slot),
- size(size),
- alignment(alignment)
+ MemoryInfo(int slot, size_t size, size_t alignment = 0) noexcept : slot(slot), size(size), alignment(alignment)
{
}
MemoryInfo(int slot, MemoryLifetime lifetime, size_t size, size_t alignment = 0) noexcept
- : slot(slot),
- lifetime(lifetime),
- size(size),
- alignment(alignment)
+ : slot(slot), lifetime(lifetime), size(size), alignment(alignment)
{
}
- int slot{ ACL_UNKNOWN };
- MemoryLifetime lifetime{ MemoryLifetime::Temporary };
- size_t size{ 0 };
- size_t alignment{ 64 };
+
+ bool merge(int slot, size_t new_size, size_t new_alignment = 0) noexcept
+ {
+ if (slot != this->slot)
+ {
+ return false;
+ }
+
+ size = std::max(size, new_size);
+ alignment = std::max(alignment, new_alignment);
+
+ return true;
+ }
+
+ int slot{ACL_UNKNOWN};
+ MemoryLifetime lifetime{MemoryLifetime::Temporary};
+ size_t size{0};
+ size_t alignment{64};
};
using MemoryRequirements = std::vector<MemoryInfo>;
} // namespace experimental
} // namespace arm_compute
-#endif /* ARM_COMPUTE_EXPERIMENTAL_TYPES_H */
+#endif // ACL_ARM_COMPUTE_CORE_EXPERIMENTAL_TYPES_H
diff --git a/arm_compute/core/utils/ActivationFunctionUtils.h b/arm_compute/core/utils/ActivationFunctionUtils.h
new file mode 100644
index 0000000000..c988efa256
--- /dev/null
+++ b/arm_compute/core/utils/ActivationFunctionUtils.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H
+
+#include "arm_compute/core/Types.h"
+
+#include <string>
+
+namespace arm_compute
+{
+/** Translates a given activation function to a string.
+ *
+ * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string.
+ *
+ * @return The string describing the activation function.
+ */
+const std::string &string_from_activation_func(const ActivationFunction &act);
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H */
diff --git a/arm_compute/core/utils/DataLayoutUtils.h b/arm_compute/core/utils/DataLayoutUtils.h
new file mode 100644
index 0000000000..61839c9f91
--- /dev/null
+++ b/arm_compute/core/utils/DataLayoutUtils.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H
+#include "arm_compute/core/Types.h"
+
+#include <string>
+
+namespace arm_compute
+{
+/** Convert a data layout identity into a string.
+ *
+ * @param[in] dl @ref DataLayout to be translated to string.
+ *
+ * @return The string describing the data layout.
+ */
+const std::string &string_from_data_layout(DataLayout dl);
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H */
diff --git a/arm_compute/core/utils/DataTypeUtils.h b/arm_compute/core/utils/DataTypeUtils.h
new file mode 100644
index 0000000000..6fabb19b64
--- /dev/null
+++ b/arm_compute/core/utils/DataTypeUtils.h
@@ -0,0 +1,549 @@
+/*
+ * Copyright (c) 2016-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H
+#define ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+/** The size in bytes of the data type
+ *
+ * @param[in] data_type Input data type
+ *
+ * @return The size in bytes of the data type
+ */
+inline size_t data_size_from_type(DataType data_type)
+{
+ switch (data_type)
+ {
+ case DataType::U8:
+ case DataType::S8:
+ case DataType::QSYMM8:
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ return 1;
+ case DataType::U16:
+ case DataType::S16:
+ case DataType::QSYMM16:
+ case DataType::QASYMM16:
+ case DataType::BFLOAT16:
+ case DataType::F16:
+ return 2;
+ case DataType::F32:
+ case DataType::U32:
+ case DataType::S32:
+ return 4;
+ case DataType::F64:
+ case DataType::U64:
+ case DataType::S64:
+ return 8;
+ case DataType::SIZET:
+ return sizeof(size_t);
+ default:
+ ARM_COMPUTE_ERROR("Invalid data type");
+ return 0;
+ }
+}
+
+/** The size in bytes of the data type
+ *
+ * @param[in] dt Input data type
+ *
+ * @return The size in bytes of the data type
+ */
+inline size_t element_size_from_data_type(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::S8:
+ case DataType::U8:
+ case DataType::QSYMM8:
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ return 1;
+ case DataType::U16:
+ case DataType::S16:
+ case DataType::QSYMM16:
+ case DataType::QASYMM16:
+ case DataType::BFLOAT16:
+ case DataType::F16:
+ return 2;
+ case DataType::U32:
+ case DataType::S32:
+ case DataType::F32:
+ return 4;
+ case DataType::U64:
+ case DataType::S64:
+ return 8;
+ default:
+ ARM_COMPUTE_ERROR("Undefined element size for given data type");
+ return 0;
+ }
+}
+
+/** Return the data type used by a given single-planar pixel format
+ *
+ * @param[in] format Input format
+ *
+ * @return The size in bytes of the pixel format
+ */
+inline DataType data_type_from_format(Format format)
+{
+ switch (format)
+ {
+ case Format::U8:
+ case Format::UV88:
+ case Format::RGB888:
+ case Format::RGBA8888:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return DataType::U8;
+ case Format::U16:
+ return DataType::U16;
+ case Format::S16:
+ return DataType::S16;
+ case Format::U32:
+ return DataType::U32;
+ case Format::S32:
+ return DataType::S32;
+ case Format::BFLOAT16:
+ return DataType::BFLOAT16;
+ case Format::F16:
+ return DataType::F16;
+ case Format::F32:
+ return DataType::F32;
+ //Doesn't make sense for planar formats:
+ case Format::NV12:
+ case Format::NV21:
+ case Format::IYUV:
+ case Format::YUV444:
+ default:
+ ARM_COMPUTE_ERROR("Not supported data_type for given format");
+ return DataType::UNKNOWN;
+ }
+}
+
+/** Return the promoted data type of a given data type.
+ *
+ * @note If promoted data type is not supported an error will be thrown
+ *
+ * @param[in] dt Data type to get the promoted type of.
+ *
+ * @return Promoted data type
+ */
+inline DataType get_promoted_data_type(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::U8:
+ return DataType::U16;
+ case DataType::S8:
+ return DataType::S16;
+ case DataType::U16:
+ return DataType::U32;
+ case DataType::S16:
+ return DataType::S32;
+ case DataType::QSYMM8:
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ case DataType::QSYMM16:
+ case DataType::QASYMM16:
+ case DataType::BFLOAT16:
+ case DataType::F16:
+ case DataType::U32:
+ case DataType::S32:
+ case DataType::F32:
+ ARM_COMPUTE_ERROR("Unsupported data type promotions!");
+ default:
+ ARM_COMPUTE_ERROR("Undefined data type!");
+ }
+ return DataType::UNKNOWN;
+}
+
+/** Compute the mininum and maximum values a data type can take
+ *
+ * @param[in] dt Data type to get the min/max bounds of
+ *
+ * @return A tuple (min,max) with the minimum and maximum values respectively wrapped in PixelValue.
+ */
+inline std::tuple<PixelValue, PixelValue> get_min_max(DataType dt)
+{
+ PixelValue min{};
+ PixelValue max{};
+ switch (dt)
+ {
+ case DataType::U8:
+ case DataType::QASYMM8:
+ {
+ min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::lowest()));
+ max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()));
+ break;
+ }
+ case DataType::S8:
+ case DataType::QSYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ {
+ min = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::lowest()));
+ max = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::max()));
+ break;
+ }
+ case DataType::U16:
+ case DataType::QASYMM16:
+ {
+ min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::lowest()));
+ max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::max()));
+ break;
+ }
+ case DataType::S16:
+ case DataType::QSYMM16:
+ {
+ min = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::lowest()));
+ max = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::max()));
+ break;
+ }
+ case DataType::U32:
+ {
+ min = PixelValue(std::numeric_limits<uint32_t>::lowest());
+ max = PixelValue(std::numeric_limits<uint32_t>::max());
+ break;
+ }
+ case DataType::S32:
+ {
+ min = PixelValue(std::numeric_limits<int32_t>::lowest());
+ max = PixelValue(std::numeric_limits<int32_t>::max());
+ break;
+ }
+ case DataType::BFLOAT16:
+ {
+ min = PixelValue(bfloat16::lowest());
+ max = PixelValue(bfloat16::max());
+ break;
+ }
+ case DataType::F16:
+ {
+ min = PixelValue(std::numeric_limits<half>::lowest());
+ max = PixelValue(std::numeric_limits<half>::max());
+ break;
+ }
+ case DataType::F32:
+ {
+ min = PixelValue(std::numeric_limits<float>::lowest());
+ max = PixelValue(std::numeric_limits<float>::max());
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Undefined data type!");
+ }
+ return std::make_tuple(min, max);
+}
+
+/** Convert a data type identity into a string.
+ *
+ * @param[in] dt @ref DataType to be translated to string.
+ *
+ * @return The string describing the data type.
+ */
+const std::string &string_from_data_type(DataType dt);
+
+/** Convert a string to DataType
+ *
+ * @param[in] name The name of the data type
+ *
+ * @return DataType
+ */
+DataType data_type_from_name(const std::string &name);
+
+/** Input Stream operator for @ref DataType
+ *
+ * @param[in] stream Stream to parse
+ * @param[out] data_type Output data type
+ *
+ * @return Updated stream
+ */
+inline ::std::istream &operator>>(::std::istream &stream, DataType &data_type)
+{
+ std::string value;
+ stream >> value;
+ data_type = data_type_from_name(value);
+ return stream;
+}
+
+/** Check if a given data type is of floating point type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of floating point type, else false.
+ */
+inline bool is_data_type_float(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::F16:
+ case DataType::F32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of quantized type
+ *
+ * @note Quantized is considered a super-set of fixed-point and asymmetric data types.
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of quantized type, else false.
+ */
+inline bool is_data_type_quantized(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::QSYMM8:
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ case DataType::QSYMM16:
+ case DataType::QASYMM16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of asymmetric quantized type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of asymmetric quantized type, else false.
+ */
+inline bool is_data_type_quantized_asymmetric(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QASYMM16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of asymmetric quantized signed type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of asymmetric quantized signed type, else false.
+ */
+inline bool is_data_type_quantized_asymmetric_signed(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::QASYMM8_SIGNED:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of 8-bit asymmetric quantized signed type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of 8-bit asymmetric quantized signed type, else false.
+ */
+inline bool is_data_type_quantized_asymmetric_char(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QASYMM8:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of symmetric quantized type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of symmetric quantized type, else false.
+ */
+inline bool is_data_type_quantized_symmetric(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::QSYMM8:
+ case DataType::QSYMM8_PER_CHANNEL:
+ case DataType::QSYMM16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Check if a given data type is of per channel type
+ *
+ * @param[in] dt Input data type.
+ *
+ * @return True if data type is of per channel type, else false.
+ */
+inline bool is_data_type_quantized_per_channel(DataType dt)
+{
+ switch (dt)
+ {
+ case DataType::QSYMM8_PER_CHANNEL:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** Returns true if the value can be represented by the given data type
+ *
+ * @param[in] val value to be checked
+ * @param[in] dt data type that is checked
+ * @param[in] qinfo (Optional) quantization info if the data type is QASYMM8
+ *
+ * @return true if the data type can hold the value.
+ */
+template <typename T>
+bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = QuantizationInfo())
+{
+ switch (dt)
+ {
+ case DataType::U8:
+ {
+ const auto val_u8 = static_cast<uint8_t>(val);
+ return ((val_u8 == val) && val >= std::numeric_limits<uint8_t>::lowest() &&
+ val <= std::numeric_limits<uint8_t>::max());
+ }
+ case DataType::QASYMM8:
+ {
+ double min = static_cast<double>(dequantize_qasymm8(0, qinfo));
+ double max = static_cast<double>(dequantize_qasymm8(std::numeric_limits<uint8_t>::max(), qinfo));
+ return ((double)val >= min && (double)val <= max);
+ }
+ case DataType::S8:
+ {
+ const auto val_s8 = static_cast<int8_t>(val);
+ return ((val_s8 == val) && val >= std::numeric_limits<int8_t>::lowest() &&
+ val <= std::numeric_limits<int8_t>::max());
+ }
+ case DataType::U16:
+ {
+ const auto val_u16 = static_cast<uint16_t>(val);
+ return ((val_u16 == val) && val >= std::numeric_limits<uint16_t>::lowest() &&
+ val <= std::numeric_limits<uint16_t>::max());
+ }
+ case DataType::S16:
+ {
+ const auto val_s16 = static_cast<int16_t>(val);
+ return ((val_s16 == val) && val >= std::numeric_limits<int16_t>::lowest() &&
+ val <= std::numeric_limits<int16_t>::max());
+ }
+ case DataType::U32:
+ {
+ const auto val_d64 = static_cast<double>(val);
+ const auto val_u32 = static_cast<uint32_t>(val);
+ return ((val_u32 == val_d64) && val_d64 >= std::numeric_limits<uint32_t>::lowest() &&
+ val_d64 <= std::numeric_limits<uint32_t>::max());
+ }
+ case DataType::S32:
+ {
+ const auto val_d64 = static_cast<double>(val);
+ const auto val_s32 = static_cast<int32_t>(val);
+ return ((val_s32 == val_d64) && val_d64 >= std::numeric_limits<int32_t>::lowest() &&
+ val_d64 <= std::numeric_limits<int32_t>::max());
+ }
+ case DataType::BFLOAT16:
+ return (val >= bfloat16::lowest() && val <= bfloat16::max());
+ case DataType::F16:
+ return (val >= std::numeric_limits<half>::lowest() && val <= std::numeric_limits<half>::max());
+ case DataType::F32:
+ return (val >= std::numeric_limits<float>::lowest() && val <= std::numeric_limits<float>::max());
+ default:
+ ARM_COMPUTE_ERROR("Data type not supported");
+ return false;
+ }
+}
+
+/** Returns the suffix string of CPU kernel implementation names based on the given data type
+ *
+ * @param[in] data_type The data type the CPU kernel implemetation uses
+ *
+ * @return the suffix string of CPU kernel implementations
+ */
+inline std::string cpu_impl_dt(const DataType &data_type)
+{
+ std::string ret = "";
+
+ switch (data_type)
+ {
+ case DataType::F32:
+ ret = "fp32";
+ break;
+ case DataType::F16:
+ ret = "fp16";
+ break;
+ case DataType::U8:
+ ret = "u8";
+ break;
+ case DataType::S16:
+ ret = "s16";
+ break;
+ case DataType::S32:
+ ret = "s32";
+ break;
+ case DataType::QASYMM8:
+ ret = "qu8";
+ break;
+ case DataType::QASYMM8_SIGNED:
+ ret = "qs8";
+ break;
+ case DataType::QSYMM16:
+ ret = "qs16";
+ break;
+ case DataType::QSYMM8_PER_CHANNEL:
+ ret = "qp8";
+ break;
+ case DataType::BFLOAT16:
+ ret = "bf16";
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported.");
+ }
+
+ return ret;
+}
+
+} // namespace arm_compute
+#endif // ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H
diff --git a/arm_compute/core/utils/FormatUtils.h b/arm_compute/core/utils/FormatUtils.h
new file mode 100644
index 0000000000..a8e96bd361
--- /dev/null
+++ b/arm_compute/core/utils/FormatUtils.h
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H
+
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/Error.h"
+
+namespace arm_compute
+{
+/** The size in bytes of the pixel format
+ *
+ * @param[in] format Input format
+ *
+ * @return The size in bytes of the pixel format
+ */
+inline size_t pixel_size_from_format(Format format)
+{
+ switch (format)
+ {
+ case Format::U8:
+ return 1;
+ case Format::U16:
+ case Format::S16:
+ case Format::BFLOAT16:
+ case Format::F16:
+ case Format::UV88:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return 2;
+ case Format::RGB888:
+ return 3;
+ case Format::RGBA8888:
+ return 4;
+ case Format::U32:
+ case Format::S32:
+ case Format::F32:
+ return 4;
+ //Doesn't make sense for planar formats:
+ case Format::NV12:
+ case Format::NV21:
+ case Format::IYUV:
+ case Format::YUV444:
+ default:
+ ARM_COMPUTE_ERROR("Undefined pixel size for given format");
+ return 0;
+ }
+}
+
+/** Return the plane index of a given channel given an input format.
+ *
+ * @param[in] format Input format
+ * @param[in] channel Input channel
+ *
+ * @return The plane index of the specific channel of the specific format
+ */
+inline int plane_idx_from_channel(Format format, Channel channel)
+{
+ switch (format)
+ {
+ // Single planar formats have a single plane
+ case Format::U8:
+ case Format::U16:
+ case Format::S16:
+ case Format::U32:
+ case Format::S32:
+ case Format::BFLOAT16:
+ case Format::F16:
+ case Format::F32:
+ case Format::UV88:
+ case Format::RGB888:
+ case Format::RGBA8888:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return 0;
+ // Multi planar formats
+ case Format::NV12:
+ case Format::NV21:
+ {
+ // Channel U and V share the same plane of format UV88
+ switch (channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ case Channel::V:
+ return 1;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::IYUV:
+ case Format::YUV444:
+ {
+ switch (channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 1;
+ case Channel::V:
+ return 2;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ default:
+ ARM_COMPUTE_ERROR("Not supported format");
+ return 0;
+ }
+}
+
+/** Return the channel index of a given channel given an input format.
+ *
+ * @param[in] format Input format
+ * @param[in] channel Input channel
+ *
+ * @return The channel index of the specific channel of the specific format
+ */
+inline int channel_idx_from_format(Format format, Channel channel)
+{
+ switch (format)
+ {
+ case Format::RGB888:
+ {
+ switch (channel)
+ {
+ case Channel::R:
+ return 0;
+ case Channel::G:
+ return 1;
+ case Channel::B:
+ return 2;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::RGBA8888:
+ {
+ switch (channel)
+ {
+ case Channel::R:
+ return 0;
+ case Channel::G:
+ return 1;
+ case Channel::B:
+ return 2;
+ case Channel::A:
+ return 3;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::YUYV422:
+ {
+ switch (channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 1;
+ case Channel::V:
+ return 3;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::UYVY422:
+ {
+ switch (channel)
+ {
+ case Channel::Y:
+ return 1;
+ case Channel::U:
+ return 0;
+ case Channel::V:
+ return 2;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::NV12:
+ {
+ switch (channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 0;
+ case Channel::V:
+ return 1;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::NV21:
+ {
+ switch (channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 1;
+ case Channel::V:
+ return 0;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ case Format::YUV444:
+ case Format::IYUV:
+ {
+ switch (channel)
+ {
+ case Channel::Y:
+ return 0;
+ case Channel::U:
+ return 0;
+ case Channel::V:
+ return 0;
+ default:
+ ARM_COMPUTE_ERROR("Not supported channel");
+ return 0;
+ }
+ }
+ default:
+ ARM_COMPUTE_ERROR("Not supported format");
+ return 0;
+ }
+}
+
+/** Return the number of planes for a given format
+ *
+ * @param[in] format Input format
+ *
+ * @return The number of planes for a given image format.
+ */
+inline size_t num_planes_from_format(Format format)
+{
+ switch (format)
+ {
+ case Format::U8:
+ case Format::S16:
+ case Format::U16:
+ case Format::S32:
+ case Format::U32:
+ case Format::BFLOAT16:
+ case Format::F16:
+ case Format::F32:
+ case Format::RGB888:
+ case Format::RGBA8888:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return 1;
+ case Format::NV12:
+ case Format::NV21:
+ return 2;
+ case Format::IYUV:
+ case Format::YUV444:
+ return 3;
+ default:
+ ARM_COMPUTE_ERROR("Not supported format");
+ return 0;
+ }
+}
+
+/** Return the number of channels for a given single-planar pixel format
+ *
+ * @param[in] format Input format
+ *
+ * @return The number of channels for a given image format.
+ */
+inline size_t num_channels_from_format(Format format)
+{
+ switch (format)
+ {
+ case Format::U8:
+ case Format::U16:
+ case Format::S16:
+ case Format::U32:
+ case Format::S32:
+ case Format::BFLOAT16:
+ case Format::F16:
+ case Format::F32:
+ return 1;
+ // Because the U and V channels are subsampled
+ // these formats appear like having only 2 channels:
+ case Format::YUYV422:
+ case Format::UYVY422:
+ return 2;
+ case Format::UV88:
+ return 2;
+ case Format::RGB888:
+ return 3;
+ case Format::RGBA8888:
+ return 4;
+ //Doesn't make sense for planar formats:
+ case Format::NV12:
+ case Format::NV21:
+ case Format::IYUV:
+ case Format::YUV444:
+ default:
+ return 0;
+ }
+}
+
+/** Convert a tensor format into a string.
+ *
+ * @param[in] format @ref Format to be translated to string.
+ *
+ * @return The string describing the format.
+ */
+const std::string &string_from_format(Format format);
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H */
diff --git a/arm_compute/core/utils/InterpolationPolicyUtils.h b/arm_compute/core/utils/InterpolationPolicyUtils.h
new file mode 100644
index 0000000000..8d4ae4321c
--- /dev/null
+++ b/arm_compute/core/utils/InterpolationPolicyUtils.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H
+
+#include "arm_compute/core/Types.h"
+
+#include <string>
+
+namespace arm_compute
+{
+/** Translates a given interpolation policy to a string.
+ *
+ * @param[in] policy @ref InterpolationPolicy to be translated to string.
+ *
+ * @return The string describing the interpolation policy.
+ */
+const std::string &string_from_interpolation_policy(InterpolationPolicy policy);
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H */
diff --git a/arm_compute/core/utils/StringUtils.h b/arm_compute/core/utils/StringUtils.h
new file mode 100644
index 0000000000..c13cbaa334
--- /dev/null
+++ b/arm_compute/core/utils/StringUtils.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H
+#define ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H
+
+#include <string>
+#include <vector>
+
+namespace arm_compute
+{
+/** Lower a given string.
+ *
+ * @param[in] val Given string to lower.
+ *
+ * @return The lowered string
+ */
+std::string lower_string(const std::string &val);
+
+/** Raise a given string to upper case
+ *
+ * @param[in] val Given string to lower.
+ *
+ * @return The upper case string
+ */
+std::string upper_string(const std::string &val);
+
+/** Create a string with the float in full precision.
+ *
+ * @param val Floating point value
+ *
+ * @return String with the floating point value.
+ */
+std::string float_to_string_with_full_precision(float val);
+
+/** Join a sequence of strings with separator @p sep
+ *
+ * @param[in] strings Strings to join
+ * @param[in] sep Separator to join consecutive strings in the sequence
+ *
+ * @return std::string
+ */
+std::string join(const std::vector<std::string> strings, const std::string &sep);
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H */
diff --git a/arm_compute/core/utils/helpers/AdjustVecSize.h b/arm_compute/core/utils/helpers/AdjustVecSize.h
new file mode 100644
index 0000000000..842e3b57d6
--- /dev/null
+++ b/arm_compute/core/utils/helpers/AdjustVecSize.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_UTILS_ADJUSTVECSIZE_H
+#define ARM_COMPUTE_UTILS_ADJUSTVECSIZE_H
+
+#include "arm_compute/core/Error.h"
+
+namespace arm_compute
+{
+/** Returns the adjusted vector size in case it is less than the input's first dimension, getting rounded down to its closest valid vector size
+ *
+ * @param[in] vec_size vector size to be adjusted
+ * @param[in] dim0 size of the first dimension
+ *
+ * @return the number of element processed along the X axis per thread
+ */
+inline unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
+{
+ ARM_COMPUTE_ERROR_ON(vec_size > 16);
+
+ if ((vec_size >= dim0) && (dim0 == 3))
+ {
+ return dim0;
+ }
+
+ while (vec_size > dim0)
+ {
+ vec_size >>= 1;
+ }
+
+ return vec_size;
+}
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_UTILS_H */
diff --git a/arm_compute/core/utils/helpers/tensor_transform.h b/arm_compute/core/utils/helpers/tensor_transform.h
index faa5b4433c..7a61fa192a 100644
--- a/arm_compute/core/utils/helpers/tensor_transform.h
+++ b/arm_compute/core/utils/helpers/tensor_transform.h
@@ -52,7 +52,8 @@ int calculate_stride_on_index(int index, Coordinates strides);
*
* @return Absolute start position of a given index
*/
-int calculate_start_on_index(TensorShape input_shape, int index, Coordinates starts, Coordinates strides, int32_t begin_mask);
+int calculate_start_on_index(
+ TensorShape input_shape, int index, Coordinates starts, Coordinates strides, int32_t begin_mask);
/** Returns the absolute end position of a given index for a strided slice operation
*
@@ -68,8 +69,13 @@ int calculate_start_on_index(TensorShape input_shape, int index, Coordinates sta
*
* @return Absolute end position of a given index
*/
-int calculate_end_on_index(TensorShape input_shape, int index, int start_on_index, Coordinates ends, Coordinates strides,
- int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+int calculate_end_on_index(TensorShape input_shape,
+ int index,
+ int start_on_index,
+ Coordinates ends,
+ Coordinates strides,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
/** Calculate start, end and stride coordinates for a strided slice
*
@@ -87,8 +93,12 @@ int calculate_end_on_index(TensorShape input_shape, int index, int start_on_inde
* @return A tuple with <Start,End,Strides>
*/
std::tuple<Coordinates, Coordinates, Coordinates> calculate_strided_slice_coords(TensorShape input_shape,
- Coordinates starts, Coordinates ends, Coordinates strides,
- int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+ Coordinates starts,
+ Coordinates ends,
+ Coordinates strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0);
/** Computes output shape of strided slice
*
@@ -109,9 +119,14 @@ std::tuple<Coordinates, Coordinates, Coordinates> calculate_strided_slice_coords
*
* @return The output tensor shape
*/
-TensorShape compute_strided_slice_output_shape(TensorShape input_shape, Coordinates starts, Coordinates ends, Coordinates strides,
- int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0,
- bool return_unshrinked = false);
+TensorShape compute_strided_slice_output_shape(TensorShape input_shape,
+ Coordinates starts,
+ Coordinates ends,
+ Coordinates strides,
+ int32_t begin_mask = 0,
+ int32_t end_mask = 0,
+ int32_t shrink_axis_mask = 0,
+ bool return_unshrinked = false);
/** Constructs end mask in case we want to perform a slice operation using the strided slice interface
*
@@ -122,7 +137,7 @@ TensorShape compute_strided_slice_output_shape(TensorShape input_shape, Coordina
* @return End mask
*/
int32_t construct_slice_end_mask(Coordinates ends);
-} // namespace tensor_tranform
+} // namespace tensor_transform
} // namespace helpers
} // namespace arm_compute
#endif /* ARM_COMPUTE_UTILS_HELPERS_TENSOR_TRANSFORM_H */
diff --git a/arm_compute/core/utils/logging/FilePrinter.h b/arm_compute/core/utils/logging/FilePrinter.h
index 0e5b84f084..a865aadddb 100644
--- a/arm_compute/core/utils/logging/FilePrinter.h
+++ b/arm_compute/core/utils/logging/FilePrinter.h
@@ -24,9 +24,8 @@
#ifndef ARM_COMPUTE_LOGGING_FILE_PRINTER_H
#define ARM_COMPUTE_LOGGING_FILE_PRINTER_H
-#include "arm_compute/core/utils/logging/IPrinter.h"
-
#include "arm_compute/core/utils/io/FileHandler.h"
+#include "arm_compute/core/utils/logging/IPrinter.h"
namespace arm_compute
{
diff --git a/arm_compute/core/utils/logging/Helpers.h b/arm_compute/core/utils/logging/Helpers.h
index 5f8b948592..c3c2f0f0b8 100644
--- a/arm_compute/core/utils/logging/Helpers.h
+++ b/arm_compute/core/utils/logging/Helpers.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_LOGGING_HELPERS_H
#include "arm_compute/core/utils/logging/Types.h"
+
#include "support/ToolchainSupport.h"
#include <cstddef>
@@ -45,7 +46,7 @@ namespace logging
* @return The formatted string
*/
template <typename... Ts>
-inline std::string string_with_format(const std::string &fmt, Ts &&... args)
+inline std::string string_with_format(const std::string &fmt, Ts &&...args)
{
size_t size = support::cpp11::snprintf(nullptr, 0, fmt.c_str(), args...) + 1;
auto char_str = std::make_unique<char[]>(size);
diff --git a/arm_compute/core/utils/logging/IPrinter.h b/arm_compute/core/utils/logging/IPrinter.h
index 42dca58ea1..7fde4d9302 100644
--- a/arm_compute/core/utils/logging/IPrinter.h
+++ b/arm_compute/core/utils/logging/IPrinter.h
@@ -35,8 +35,7 @@ class Printer
{
public:
/** Default Constructor */
- Printer() noexcept
- : _mtx()
+ Printer() noexcept : _mtx()
{
}
/** Prevent instances of this class from being copied */
diff --git a/arm_compute/core/utils/logging/LogMsgDecorators.h b/arm_compute/core/utils/logging/LogMsgDecorators.h
index 9c9e62740f..66a8180e21 100644
--- a/arm_compute/core/utils/logging/LogMsgDecorators.h
+++ b/arm_compute/core/utils/logging/LogMsgDecorators.h
@@ -63,8 +63,7 @@ public:
*
* @param str Sting to append
*/
- StringDecorator(const std::string &str)
- : _str(str)
+ StringDecorator(const std::string &str) : _str(str)
{
_str = angle_wrap_value(str);
}
@@ -103,7 +102,7 @@ private:
auto time = std::chrono::system_clock::to_time_t(now);
// TODO: use put_time for gcc > 4.9
- char buf[100] = { 0 };
+ char buf[100] = {0};
std::strftime(buf, sizeof(buf), "%d-%m-%Y %I:%M:%S", std::localtime(&time));
return buf;
}
diff --git a/arm_compute/core/utils/logging/Logger.h b/arm_compute/core/utils/logging/Logger.h
index 4fc9bb7dbf..608db39138 100644
--- a/arm_compute/core/utils/logging/Logger.h
+++ b/arm_compute/core/utils/logging/Logger.h
@@ -88,7 +88,7 @@ public:
* @param[in] args Message arguments
*/
template <typename... Ts>
- void log(LogLevel log_level, const std::string &fmt, Ts &&... args);
+ void log(LogLevel log_level, const std::string &fmt, Ts &&...args);
/** Sets log level of the logger
*
* @warning Not thread-safe
@@ -159,11 +159,11 @@ private:
};
template <typename... Ts>
-inline void Logger::log(LogLevel log_level, const std::string &fmt, Ts &&... args)
+inline void Logger::log(LogLevel log_level, const std::string &fmt, Ts &&...args)
{
// Return if message shouldn't be logged
// i.e. if log level does not match the logger's
- if(!is_loggable(log_level))
+ if (!is_loggable(log_level))
{
return;
}
diff --git a/arm_compute/core/utils/logging/LoggerRegistry.h b/arm_compute/core/utils/logging/LoggerRegistry.h
index 7c9931a260..4e52a10935 100644
--- a/arm_compute/core/utils/logging/LoggerRegistry.h
+++ b/arm_compute/core/utils/logging/LoggerRegistry.h
@@ -27,6 +27,7 @@
#include "arm_compute/core/utils/logging/Logger.h"
#include "arm_compute/core/utils/logging/Printers.h"
#include "arm_compute/core/utils/logging/Types.h"
+
#include "support/Mutex.h"
#include <memory>
@@ -54,8 +55,9 @@ public:
* @param[in] log_level Logger's log level. Defaults to INFO
* @param[in] printers Printers to attach to the system loggers. Defaults with a @ref StdPrinter.
*/
- void create_logger(const std::string &name, LogLevel log_level = LogLevel::INFO,
- const std::vector<std::shared_ptr<Printer>> &printers = { std::make_shared<StdPrinter>() });
+ void create_logger(const std::string &name,
+ LogLevel log_level = LogLevel::INFO,
+ const std::vector<std::shared_ptr<Printer>> &printers = {std::make_shared<StdPrinter>()});
/** Remove a logger
*
* @param name Logger's name
@@ -74,16 +76,17 @@ public:
* @param[in] printers (Optional) Printers to attach to the system loggers. Defaults with a @ref StdPrinter.
*/
void create_reserved_loggers(LogLevel log_level = LogLevel::INFO,
- const std::vector<std::shared_ptr<Printer>> &printers = { std::make_shared<StdPrinter>() });
+ const std::vector<std::shared_ptr<Printer>> &printers = {
+ std::make_shared<StdPrinter>()});
private:
/** Default constructor */
LoggerRegistry();
private:
- arm_compute::Mutex _mtx;
+ arm_compute::Mutex _mtx;
std::unordered_map<std::string, std::shared_ptr<Logger>> _loggers;
- static std::set<std::string> _reserved_loggers;
+ static std::set<std::string> _reserved_loggers;
};
} // namespace logging
} // namespace arm_compute
diff --git a/arm_compute/core/utils/logging/Macros.h b/arm_compute/core/utils/logging/Macros.h
index 1108dd3800..4d5aa5fe2c 100644
--- a/arm_compute/core/utils/logging/Macros.h
+++ b/arm_compute/core/utils/logging/Macros.h
@@ -30,52 +30,66 @@
#ifdef ARM_COMPUTE_LOGGING_ENABLED
+#ifdef __GNUC__
+inline std::string signature_name(const std::string &pretty_func)
+{
+ const auto scope_op = pretty_func.find("::");
+ const auto begin = pretty_func.substr(0, scope_op).rfind(" ") + 1;
+ const auto end = pretty_func.rfind("(") - begin;
+
+ return pretty_func.substr(begin, end) + "()";
+}
+#define ARM_COMPUTE_SIGNATURE_NAME signature_name(__PRETTY_FUNCTION__)
+#else /* __GNUC__ */
+#define ARM_COMPUTE_SIGNATURE_NAME (__func__)
+#endif /* __GNUC__ */
+
#define ARM_COMPUTE_LOG_MSG(logger_name, log_level, msg) \
do \
{ \
auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \
- if(__logger != nullptr) \
+ if (__logger != nullptr) \
{ \
__logger->log(log_level, msg); \
} \
- } while(false)
+ } while (false)
#define ARM_COMPUTE_LOG_MSG_WITH_FUNCNAME(logger_name, log_level, msg) \
do \
{ \
auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \
- if(__logger != nullptr) \
+ if (__logger != nullptr) \
{ \
std::ostringstream s; \
- s << __func__ << ":" << msg; \
+ s << ARM_COMPUTE_SIGNATURE_NAME << " : " << msg; \
__logger->log(log_level, s.str()); \
} \
- } while(false)
+ } while (false)
#define ARM_COMPUTE_LOG_MSG_WITH_FORMAT(logger_name, log_level, fmt, ...) \
do \
{ \
auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \
- if(__logger != nullptr) \
+ if (__logger != nullptr) \
{ \
size_t size = ::snprintf(nullptr, 0, fmt, __VA_ARGS__) + 1; \
auto char_str = std::make_unique<char[]>(size); \
- ::snprintf(char_str.get(), size, #fmt, __VA_ARGS__); \
+ ::snprintf(char_str.get(), size, fmt, __VA_ARGS__); \
__logger->log(log_level, std::string(char_str.get(), char_str.get() + size - 1)); \
} \
- } while(false)
+ } while (false)
#define ARM_COMPUTE_LOG_STREAM(logger_name, log_level, stream) \
do \
{ \
auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \
- if(__logger != nullptr) \
+ if (__logger != nullptr) \
{ \
std::ostringstream s; \
s << stream; \
__logger->log(log_level, s.str()); \
} \
- } while(false)
+ } while (false)
#else /* ARM_COMPUTE_LOGGING_ENABLED */
diff --git a/arm_compute/core/utils/logging/Types.h b/arm_compute/core/utils/logging/Types.h
index f0ddae6c84..64c567b984 100644
--- a/arm_compute/core/utils/logging/Types.h
+++ b/arm_compute/core/utils/logging/Types.h
@@ -44,8 +44,7 @@ enum class LogLevel
struct LogMsg
{
/** Default constructor */
- LogMsg()
- : raw_(), log_level_(LogLevel::OFF)
+ LogMsg() : raw_(), log_level_(LogLevel::OFF)
{
}
/** Construct a log message
@@ -53,8 +52,7 @@ struct LogMsg
* @param[in] msg Message to log.
* @param[in] log_level Logging level. Default: OFF
*/
- LogMsg(std::string msg, LogLevel log_level = LogLevel::OFF)
- : raw_(msg), log_level_(log_level)
+ LogMsg(std::string msg, LogLevel log_level = LogLevel::OFF) : raw_(msg), log_level_(log_level)
{
}
diff --git a/arm_compute/core/utils/math/Math.h b/arm_compute/core/utils/math/Math.h
new file mode 100644
index 0000000000..e70337ba0f
--- /dev/null
+++ b/arm_compute/core/utils/math/Math.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017-2018, 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_UTILS_MATH_H
+#define ARM_COMPUTE_UTILS_MATH_H
+
+namespace arm_compute
+{
+/** Calculate the rounded up quotient of val / m.
+ *
+ * @param[in] val Value to divide and round up.
+ * @param[in] m Value to divide by.
+ *
+ * @return the result.
+ */
+template <typename S, typename T>
+constexpr auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m)
+{
+ return (val + m - 1) / m;
+}
+
+/** Computes the smallest number larger or equal to value that is a multiple of divisor.
+ *
+ * @param[in] value Lower bound value
+ * @param[in] divisor Value to compute multiple of.
+ *
+ * @return the result.
+ */
+template <typename S, typename T>
+inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor)
+{
+ ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0);
+ return DIV_CEIL(value, divisor) * divisor;
+}
+
+/** Computes the largest number smaller or equal to value that is a multiple of divisor.
+ *
+ * @param[in] value Upper bound value
+ * @param[in] divisor Value to compute multiple of.
+ *
+ * @return the result.
+ */
+template <typename S, typename T>
+inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) * divisor)
+{
+ ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0);
+ return (value / divisor) * divisor;
+}
+
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_UTILS_MATH_H */
diff --git a/arm_compute/core/utils/math/SafeOps.h b/arm_compute/core/utils/math/SafeOps.h
index f0d76a3d02..ef8bcf7e14 100644
--- a/arm_compute/core/utils/math/SafeOps.h
+++ b/arm_compute/core/utils/math/SafeOps.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,8 @@
#define ARM_COMPUTE_UTILS_MATH_SAFE_OPS
#include "arm_compute/core/Error.h"
-#include "support/Requires.h"
+
+#include "support/AclRequires.h"
#include <limits>
@@ -51,11 +52,11 @@ T safe_integer_add(T val_a, T val_b)
{
T result = 0;
- if((val_b > 0) && (val_a > std::numeric_limits<T>::max() - val_b))
+ if ((val_b > 0) && (val_a > std::numeric_limits<T>::max() - val_b))
{
result = std::numeric_limits<T>::max();
}
- else if((val_b < 0) && (val_a < std::numeric_limits<T>::min() - val_b))
+ else if ((val_b < 0) && (val_a < std::numeric_limits<T>::min() - val_b))
{
result = std::numeric_limits<T>::min();
}
@@ -83,11 +84,11 @@ T safe_integer_sub(T val_a, T val_b)
{
T result = 0;
- if((val_b < 0) && (val_a > std::numeric_limits<T>::max() + val_b))
+ if ((val_b < 0) && (val_a > std::numeric_limits<T>::max() + val_b))
{
result = std::numeric_limits<T>::max();
}
- else if((val_b > 0) && (val_a < std::numeric_limits<T>::min() + val_b))
+ else if ((val_b > 0) && (val_a < std::numeric_limits<T>::min() + val_b))
{
result = std::numeric_limits<T>::min();
}
@@ -115,13 +116,13 @@ T safe_integer_mul(T val_a, T val_b)
{
T result = 0;
- if(val_a > 0)
+ if (val_a > 0)
{
- if((val_b > 0) && (val_a > (std::numeric_limits<T>::max() / val_b)))
+ if ((val_b > 0) && (val_a > (std::numeric_limits<T>::max() / val_b)))
{
result = std::numeric_limits<T>::max();
}
- else if(val_b < (std::numeric_limits<T>::min() / val_a))
+ else if (val_b < (std::numeric_limits<T>::min() / val_a))
{
result = std::numeric_limits<T>::min();
}
@@ -132,11 +133,11 @@ T safe_integer_mul(T val_a, T val_b)
}
else
{
- if((val_b > 0) && (val_a < (std::numeric_limits<T>::min() / val_b)))
+ if ((val_b > 0) && (val_a < (std::numeric_limits<T>::min() / val_b)))
{
result = std::numeric_limits<T>::max();
}
- else if((val_a != 0) && (val_b < (std::numeric_limits<T>::max() / val_a)))
+ else if ((val_a != 0) && (val_b < (std::numeric_limits<T>::max() / val_a)))
{
result = std::numeric_limits<T>::min();
}
@@ -165,7 +166,7 @@ T safe_integer_div(T val_a, T val_b)
{
T result = 0;
- if((val_b == 0) || ((val_a == std::numeric_limits<T>::min()) && (val_b == -1)))
+ if ((val_b == 0) || ((val_a == std::numeric_limits<T>::min()) && (val_b == -1)))
{
result = std::numeric_limits<T>::min();
}
@@ -176,7 +177,7 @@ T safe_integer_div(T val_a, T val_b)
return result;
}
-} // namespace cast
+} // namespace math
} // namespace utils
} // namespace arm_compute
#endif /* ARM_COMPUTE_UTILS_MATH_SAFE_OPS */
diff --git a/arm_compute/core/utils/misc/InfoHelpers.h b/arm_compute/core/utils/misc/InfoHelpers.h
index ced0d24b56..1d1b4ea8d7 100644
--- a/arm_compute/core/utils/misc/InfoHelpers.h
+++ b/arm_compute/core/utils/misc/InfoHelpers.h
@@ -53,10 +53,12 @@ inline bool is_relu(ActivationLayerInfo activation_info)
*/
inline bool is_relu6(ActivationLayerInfo activation_info)
{
- const bool is_lu_bounded_relu = activation_info.activation() == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU
- && activation_info.a() == 6.f && activation_info.b() == 0.f;
- const bool is_bounded_relu = activation_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU
- && activation_info.a() == 6.f;
+ const bool is_lu_bounded_relu =
+ activation_info.activation() == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU &&
+ activation_info.a() == 6.f && activation_info.b() == 0.f;
+ const bool is_bounded_relu =
+ activation_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU &&
+ activation_info.a() == 6.f;
return activation_info.enabled() && (is_lu_bounded_relu || is_bounded_relu);
}
@@ -68,34 +70,37 @@ inline bool is_relu6(ActivationLayerInfo activation_info)
*
*/
template <typename T>
-inline void build_lstm_params_tensor_info(const LSTMParams<T> &lstm_params,
- LSTMParams<ITensorInfo> *lstm_params_info)
+inline void build_lstm_params_tensor_info(const LSTMParams<T> &lstm_params, LSTMParams<ITensorInfo> *lstm_params_info)
{
- if(lstm_params.has_peephole_opt())
+ if (lstm_params.has_peephole_opt())
{
ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights());
- lstm_params_info->set_peephole_params(lstm_params.cell_to_forget_weights()->info(), lstm_params.cell_to_output_weights()->info());
+ lstm_params_info->set_peephole_params(lstm_params.cell_to_forget_weights()->info(),
+ lstm_params.cell_to_output_weights()->info());
}
- if(lstm_params.has_projection())
+ if (lstm_params.has_projection())
{
ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.projection_weights());
- lstm_params_info->set_projection_params(lstm_params.projection_weights()->info(),
- lstm_params.projection_bias() != nullptr ? lstm_params.projection_bias()->info() : nullptr);
+ lstm_params_info->set_projection_params(
+ lstm_params.projection_weights()->info(),
+ lstm_params.projection_bias() != nullptr ? lstm_params.projection_bias()->info() : nullptr);
}
- if(!lstm_params.has_cifg_opt())
+ if (!lstm_params.has_cifg_opt())
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(), lstm_params.input_gate_bias());
+ ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(),
+ lstm_params.input_gate_bias());
- ITensorInfo *cell_to_input_weights_info = (lstm_params.has_peephole_opt()) ? lstm_params.cell_to_input_weights()->info() : nullptr;
- lstm_params_info->set_cifg_params(lstm_params.input_to_input_weights()->info(), lstm_params.recurrent_to_input_weights()->info(),
- cell_to_input_weights_info, lstm_params.input_gate_bias()->info());
+ ITensorInfo *cell_to_input_weights_info =
+ (lstm_params.has_peephole_opt()) ? lstm_params.cell_to_input_weights()->info() : nullptr;
+ lstm_params_info->set_cifg_params(lstm_params.input_to_input_weights()->info(),
+ lstm_params.recurrent_to_input_weights()->info(), cell_to_input_weights_info,
+ lstm_params.input_gate_bias()->info());
}
- if(lstm_params.use_layer_norm())
+ if (lstm_params.use_layer_norm())
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(),
- lstm_params.output_layer_norm_weights(),
+ ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(), lstm_params.output_layer_norm_weights(),
lstm_params.cell_layer_norm_weights());
- if(!lstm_params.has_cifg_opt())
+ if (!lstm_params.has_cifg_opt())
{
ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_layer_norm_weights());
}
@@ -103,15 +108,14 @@ inline void build_lstm_params_tensor_info(const LSTMParams<T> &lstm_params,
ITensorInfo *forget_info = lstm_params.forget_layer_norm_weights()->info();
ITensorInfo *cell_info = lstm_params.cell_layer_norm_weights()->info();
ITensorInfo *output_info = lstm_params.output_layer_norm_weights()->info();
- ITensorInfo *input_info = lstm_params.has_cifg_opt() ? nullptr : lstm_params.input_layer_norm_weights()->info();
+ ITensorInfo *input_info = lstm_params.has_cifg_opt() ? nullptr : lstm_params.input_layer_norm_weights()->info();
lstm_params_info->set_layer_normalization_params(input_info, forget_info, cell_info, output_info);
}
- lstm_params_info->set_matmul_scale_params(lstm_params.input_intermediate_scale(),
- lstm_params.forget_intermediate_scale(),
- lstm_params.cell_intermediate_scale(),
- lstm_params.output_intermediate_scale());
+ lstm_params_info->set_matmul_scale_params(
+ lstm_params.input_intermediate_scale(), lstm_params.forget_intermediate_scale(),
+ lstm_params.cell_intermediate_scale(), lstm_params.output_intermediate_scale());
lstm_params_info->set_hidden_state_params(lstm_params.hidden_state_zero(), lstm_params.hidden_state_scale());
}
diff --git a/arm_compute/core/utils/misc/MMappedFile.h b/arm_compute/core/utils/misc/MMappedFile.h
index b3e0994b5b..3efdbc5bda 100644
--- a/arm_compute/core/utils/misc/MMappedFile.h
+++ b/arm_compute/core/utils/misc/MMappedFile.h
@@ -24,7 +24,7 @@
#ifndef ARM_COMPUTE_MISC_MMAPPED_FILE_H
#define ARM_COMPUTE_MISC_MMAPPED_FILE_H
-#if !defined(BARE_METAL)
+#if !defined(_WIN64) && !defined(BARE_METAL)
#include <string>
#include <utility>
@@ -105,6 +105,6 @@ private:
} // namespace mmap_io
} // namespace utils
} // namespace arm_compute
-#endif // !defined(BARE_METAL)
+#endif // !defined(_WIN64) &&!defined(BARE_METAL)
#endif /* ARM_COMPUTE_MISC_MMAPPED_FILE_H */
diff --git a/arm_compute/core/utils/misc/Macros.h b/arm_compute/core/utils/misc/Macros.h
index de66b6a52f..fa861fa442 100644
--- a/arm_compute/core/utils/misc/Macros.h
+++ b/arm_compute/core/utils/misc/Macros.h
@@ -26,15 +26,16 @@
#if defined(__cplusplus) && (__cplusplus >= 201402L)
-#define ARM_COMPUTE_DEPRECATED [[deprecated]]
-#define ARM_COMPUTE_DEPRECATED_REL(rel) [[deprecated("Deprecated in : " #rel)]]
+#define ARM_COMPUTE_DEPRECATED [[deprecated]]
+#define ARM_COMPUTE_DEPRECATED_REL(rel) [[deprecated("Deprecated in : " #rel)]]
#define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) [[deprecated("Deprecated in : " #rel " - Use : " #replace)]]
#elif defined(__GNUC__) || defined(__clang__)
-#define ARM_COMPUTE_DEPRECATED __attribute__((deprecated))
+#define ARM_COMPUTE_DEPRECATED __attribute__((deprecated))
#define ARM_COMPUTE_DEPRECATED_REL(rel) __attribute__((deprecated("Deprecated in : " #rel)))
-#define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) __attribute__((deprecated("Deprecated in : " #rel " - Use : " #replace)))
+#define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) \
+ __attribute__((deprecated("Deprecated in : " #rel " - Use : " #replace)))
#else // defined(__cplusplus) && (__cplusplus >= 201402L)
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index d0dc202f91..e97d81390e 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,15 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H
-#define ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H
+#ifndef ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H
+#define ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Utils.h"
-
#include "arm_compute/core/utils/helpers/tensor_transform.h"
+#include "arm_compute/function_info/ConvolutionInfo.h"
+#include "arm_compute/runtime/FunctionDescriptors.h"
#include <cmath>
@@ -55,20 +56,27 @@ inline TensorShape calculate_reduce_mean_shape(ITensorInfo *input, const Coordin
convert_negative_axis(axis_local, input_dims);
TensorShape out_shape = input->tensor_shape();
// Configure reshape layer if we want to drop the dimensions
- if(!keep_dims)
+ if (!keep_dims)
{
// We have to sort the reduction axis vectors in order for remove_dimension
// to work properly
+
+// Suppress warning produced by a compiler bug in GCC
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104165
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
std::sort(axis_local.begin(), axis_local.begin() + reduction_ops);
- for(int i = 0; i < reduction_ops; ++i)
+#pragma GCC diagnostic pop
+
+ for (int i = 0; i < reduction_ops; ++i)
{
- out_shape.remove_dimension(axis_local[i] - i);
+ out_shape.remove_dimension(axis_local[i] - i, false);
}
return out_shape;
}
else
{
- for(int i = 0; i < reduction_ops; ++i)
+ for (int i = 0; i < reduction_ops; ++i)
{
out_shape.set(axis_local[i], 1);
}
@@ -84,7 +92,10 @@ inline TensorShape calculate_reduce_mean_shape(ITensorInfo *input, const Coordin
*
* @return the calculated shape
*/
-inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input, size_t conv_w, size_t conv_h, const DataLayout &data_layout)
+inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input,
+ size_t conv_w,
+ size_t conv_h,
+ const DataLayout &data_layout)
{
const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
@@ -126,10 +137,12 @@ inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t
const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL);
ARM_COMPUTE_ERROR_ON(stride <= 0);
- ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_width] % stride != 0), "The width of the input tensor must be a multiple of stride");
- ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_height] % stride != 0), "The height of the input tensor must be a multiple of stride");
+ ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_width] % stride != 0),
+ "The width of the input tensor must be a multiple of stride");
+ ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_height] % stride != 0),
+ "The height of the input tensor must be a multiple of stride");
- TensorShape output_shape{ input.tensor_shape() };
+ TensorShape output_shape{input.tensor_shape()};
output_shape.set(idx_width, output_shape[idx_width] / stride);
output_shape.set(idx_height, output_shape[idx_height] / stride);
@@ -146,7 +159,8 @@ inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t
*
* @return the calculated shape of the reshaped weights
*/
-inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1)
+inline TensorShape
+compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1)
{
// Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
ARM_COMPUTE_ERROR_ON(num_groups == 0);
@@ -154,14 +168,14 @@ inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bo
ARM_COMPUTE_ERROR_ON((weights.dimension(3) % num_groups) != 0);
// Calculate output shape
- TensorShape weights_reshaped{ weights.tensor_shape() };
+ TensorShape weights_reshaped{weights.tensor_shape()};
weights_reshaped.set(3, weights_reshaped[3] / num_groups);
weights_reshaped.collapse(3);
const size_t tmp_dim = weights_reshaped[0];
weights_reshaped.set(0, weights_reshaped[1]);
weights_reshaped.set(1, tmp_dim + (has_bias ? 1 : 0));
- if(weights.num_dimensions() < 5)
+ if (weights.num_dimensions() < 5)
{
weights_reshaped.set(2, num_groups);
}
@@ -177,7 +191,9 @@ inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bo
*
* @return the calculated shape
*/
-inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false)
+inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a,
+ const GEMMLHSMatrixInfo &lhs_info,
+ bool reinterpret_input_as_3d = false)
{
ARM_COMPUTE_ERROR_ON(lhs_info.m0 == 0);
ARM_COMPUTE_ERROR_ON(lhs_info.k0 == 0);
@@ -198,11 +214,11 @@ inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLH
const unsigned int output_width = block_size * num_horiz_blocks * lhs_info.v0;
const unsigned int output_height = std::ceil(num_vert_blocks / static_cast<float>(lhs_info.v0));
- TensorShape lhs_shape{ a.tensor_shape() };
+ TensorShape lhs_shape{a.tensor_shape()};
lhs_shape.set(0, output_width);
lhs_shape.set(1, output_height);
- if((reinterpret_input_as_3d) && (lhs_shape.num_dimensions() > 2))
+ if ((reinterpret_input_as_3d) && (lhs_shape.num_dimensions() > 2))
{
// When the data format is NHWC and the shapes are Nx1x1
// the tensor shape num_dimensions is automatically set to 1 instead of 3.
@@ -242,7 +258,7 @@ inline TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRH
const unsigned int output_width = block_size * num_vert_blocks * rhs_info.h0;
const unsigned int output_height = std::ceil(num_horiz_blocks / static_cast<float>(rhs_info.h0));
- TensorShape rhs_shape{ a.tensor_shape() };
+ TensorShape rhs_shape{a.tensor_shape()};
rhs_shape.set(0, output_width);
rhs_shape.set(1, output_height);
@@ -257,14 +273,15 @@ inline TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRH
*
* @return the calculated shape
*/
-inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false)
+inline TensorShape
+compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false)
{
// The interleaved output matrix will have the following shape: [ a_height * W, ceil(a_width / W) ] where W = 4 * mult_interleave4x4_height
ARM_COMPUTE_ERROR_ON(mult_interleave4x4_height < 1);
const int interleave_width = 4 * mult_interleave4x4_height;
- TensorShape shape_interleaved_a{ a.tensor_shape() };
+ TensorShape shape_interleaved_a{a.tensor_shape()};
shape_interleaved_a.set(0, a.dimension(0) * interleave_width);
- if(reinterpret_input_as_3d)
+ if (reinterpret_input_as_3d)
{
const int M = a.dimension(1) * a.dimension(2);
const int height = std::ceil(M / static_cast<float>(interleave_width));
@@ -274,7 +291,7 @@ inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_inte
// the tensor shape num_dimensions is automatically set to 1 instead of 3.
// To avoid failures by removing a dimension that doesn't exist
// check if the number of dimensions is greater than 2.
- if(shape_interleaved_a.num_dimensions() > 2)
+ if (shape_interleaved_a.num_dimensions() > 2)
{
shape_interleaved_a.remove_dimension(2);
}
@@ -296,7 +313,7 @@ inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_inte
inline TensorShape compute_transpose1xW_shape(const ITensorInfo &b)
{
// The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ]
- TensorShape shape_transposed1xW_b{ b.tensor_shape() };
+ TensorShape shape_transposed1xW_b{b.tensor_shape()};
shape_transposed1xW_b.set(0, b.dimension(1) * 16);
shape_transposed1xW_b.set(1, std::ceil(b.dimension(0) / 16.f));
@@ -316,7 +333,7 @@ inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInf
// The transpose1xW output matrix will have the following shape:
// [ b_height * W, ceil(b_width / W) ] where W = (16 / element size of the tensor) * mult_transpose1xW_width
ARM_COMPUTE_ERROR_ON(mult_transpose1xW_width < 1);
- TensorShape shape_transposed1xW_b{ b.tensor_shape() };
+ TensorShape shape_transposed1xW_b{b.tensor_shape()};
const size_t transpose_width = (16 / b.element_size()) * mult_transpose1xW_width;
shape_transposed1xW_b.set(0, b.dimension(1) * transpose_width);
shape_transposed1xW_b.set(1, static_cast<size_t>(std::ceil(b.dimension(0) / static_cast<float>(transpose_width))));
@@ -332,8 +349,8 @@ inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInf
*/
inline TensorShape compute_reductionA_shape(const ITensorInfo &b)
{
- TensorShape shape_vector_sum_col{ b.tensor_shape() };
- if(shape_vector_sum_col.num_dimensions() > 1)
+ TensorShape shape_vector_sum_col{b.tensor_shape()};
+ if (shape_vector_sum_col.num_dimensions() > 1)
{
shape_vector_sum_col.remove_dimension(1);
}
@@ -349,9 +366,9 @@ inline TensorShape compute_reductionA_shape(const ITensorInfo &b)
*/
inline TensorShape compute_reductionB_shape(const ITensorInfo &a)
{
- TensorShape shape_vector_sum_row{ a.tensor_shape() };
+ TensorShape shape_vector_sum_row{a.tensor_shape()};
shape_vector_sum_row.set(Window::DimX, a.dimension(1));
- if(shape_vector_sum_row.num_dimensions() > 1)
+ if (shape_vector_sum_row.num_dimensions() > 1)
{
shape_vector_sum_row.remove_dimension(1);
}
@@ -368,7 +385,10 @@ inline TensorShape compute_reductionB_shape(const ITensorInfo &a)
*
* @return the calculated shape
*/
-inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &convolved_dims, bool batch_size_on_z, unsigned int num_groups = 1)
+inline TensorShape compute_col2im_shape(const ITensorInfo &input,
+ const Size2D &convolved_dims,
+ bool batch_size_on_z,
+ unsigned int num_groups = 1)
{
ARM_COMPUTE_ERROR_ON(num_groups == 0);
ARM_COMPUTE_ERROR_ON(input.tensor_shape()[1] != (convolved_dims.area()));
@@ -379,10 +399,10 @@ inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &
const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- TensorShape col2im_shape{ input.tensor_shape() };
+ TensorShape col2im_shape{input.tensor_shape()};
// If batches start on 3rd dimension shift dimensions right by 1 to retain upper tensor shape,
// as first three will be override by H,W,C data
- if(batch_size_on_z && num_groups == 1)
+ if (batch_size_on_z && num_groups == 1)
{
col2im_shape.shift_right(1);
}
@@ -401,10 +421,10 @@ inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &
*/
inline TensorShape compute_transposed_shape(const ITensorInfo &input)
{
- TensorShape shape_transposed{ input.tensor_shape() };
+ TensorShape shape_transposed{input.tensor_shape()};
- shape_transposed.set(0, input.dimension(1));
- shape_transposed.set(1, input.dimension(0));
+ shape_transposed.set(0, input.dimension(1), false);
+ shape_transposed.set(1, input.dimension(0), false);
return shape_transposed;
}
@@ -417,10 +437,11 @@ inline TensorShape compute_transposed_shape(const ITensorInfo &input)
*
* @return the calculated shape
*/
-inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info)
+inline TensorShape
+compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info)
{
- const TensorShape input_shape{ input.tensor_shape() };
- const TensorShape weights_shape{ weights.tensor_shape() };
+ const TensorShape input_shape{input.tensor_shape()};
+ const TensorShape weights_shape{weights.tensor_shape()};
const DataLayout data_layout = input.data_layout();
const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
@@ -428,16 +449,16 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
const DataLayout weights_data_layout = weights.data_layout();
- const int weights_width_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH);
- const int weights_height_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT);
+ const int weights_width_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH);
+ const int weights_height_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT);
unsigned int output_width = 0;
unsigned int output_height = 0;
- std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx],
- weights_shape[weights_width_idx], weights_shape[weights_height_idx],
- info.pad_stride_info, info.dilation);
+ std::tie(output_width, output_height) =
+ scaled_dimensions(input_shape[width_idx], input_shape[height_idx], weights_shape[weights_width_idx],
+ weights_shape[weights_height_idx], info.pad_stride_info, info.dilation);
- TensorShape output_shape{ input_shape };
+ TensorShape output_shape{input_shape};
output_shape.set(width_idx, output_width);
output_shape.set(height_idx, output_height);
output_shape.set(channel_idx, input_shape[channel_idx] * info.depth_multiplier);
@@ -445,6 +466,37 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
return output_shape;
}
+/** Calculate padding required for deconvolution
+ *
+ * @param[in] input Input tensor info
+ * @param[in] weights Weights tensor shape
+ * @param[in] sx Stride on x axis
+ * @param[in] sy Stride on y axis
+ * @param[in] out_dims Output shape dimensions
+ *
+ * @return the padding required
+ */
+inline std::pair<int32_t, int32_t> compute_deconvolution_padding(const ITensorInfo &input,
+ const ITensorInfo &weights,
+ int32_t sx,
+ int32_t sy,
+ std::pair<uint32_t, uint32_t> out_dims)
+{
+ const DataLayout data_layout = input.data_layout();
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+ // Find the upsampled dimensions
+ int32_t out_x = (static_cast<int32_t>(input.dimension(idx_w)) - 1) * sx + 1;
+ int32_t out_y = (static_cast<int32_t>(input.dimension(idx_h)) - 1) * sy + 1;
+
+ // Find the padding needed for the convolution with stride 1 in order to match output shape
+ int32_t padx = out_dims.first - (out_x - static_cast<int32_t>(weights.dimension(idx_w)) + 1);
+ int32_t pady = out_dims.second - (out_y - static_cast<int32_t>(weights.dimension(idx_h)) + 1);
+
+ return std::make_pair(padx, pady);
+}
+
/** Calculate the upsampled output shape used for deconvolution
*
* @param[in] input Input tensor info
@@ -457,20 +509,28 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
*
* @return the calculated shape
*/
-inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy,
- std::pair<unsigned int, unsigned int> &out_dims, uint32_t &padx, uint32_t &pady)
+inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input,
+ const ITensorInfo &weights,
+ unsigned int sx,
+ unsigned int sy,
+ std::pair<unsigned int, unsigned int> &out_dims,
+ uint32_t &padx,
+ uint32_t &pady)
{
+ // Find the padding needed for the convolution with stride 1 in order to match output shape
+ const auto padxy =
+ compute_deconvolution_padding(input, weights, static_cast<int32_t>(sx), static_cast<int32_t>(sy), out_dims);
+ padx = static_cast<uint32_t>(padxy.first);
+ pady = static_cast<uint32_t>(padxy.second);
+
const DataLayout data_layout = input.data_layout();
const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
// Find the upsampled dimensions
- unsigned int out_x = (input.dimension(idx_w) - 1) * sx + 1;
- unsigned int out_y = (input.dimension(idx_h) - 1) * sy + 1;
+ uint32_t out_x = (input.dimension(idx_w) - 1) * sx + 1;
+ uint32_t out_y = (input.dimension(idx_h) - 1) * sy + 1;
- // Find the padding needed for the convolution with stride 1 in order to match output shape
- padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1);
- pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1);
out_x += padx;
out_y += pady;
@@ -489,10 +549,12 @@ inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &inpu
*
* @return the calculated shape
*/
-inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, const ITensorInfo &input, const ITensorInfo &weights)
+inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims,
+ const ITensorInfo &input,
+ const ITensorInfo &weights)
{
- const TensorShape input_shape{ input.tensor_shape() };
- const TensorShape weights_shape{ weights.tensor_shape() };
+ const TensorShape input_shape{input.tensor_shape()};
+ const TensorShape weights_shape{weights.tensor_shape()};
const DataLayout data_layout = input.data_layout();
const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
@@ -500,7 +562,7 @@ inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned i
const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
- TensorShape out_shape{ input_shape };
+ TensorShape out_shape{input_shape};
out_shape.set(width_idx, out_dims.first);
out_shape.set(height_idx, out_dims.second);
out_shape.set(channel_idx, weights_shape[batch_idx]);
@@ -516,11 +578,18 @@ inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned i
* @param[in] dilation Dilation, in elements, across x and y
* @param[in] batch_size_on_z True if batch size is on z axis
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
+ * @param[in] input_pad_right (Optional) When fast-math is selected, per element padding for the im2col matrix may be necessary
*
* @return the calculated shape
*/
-inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z,
- unsigned int num_groups = 1)
+inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input,
+ const Size2D &kernel_dims,
+ const PadStrideInfo &conv_info,
+ bool has_bias,
+ const Size2D &dilation,
+ bool batch_size_on_z,
+ unsigned int num_groups = 1,
+ unsigned int input_pad_right = 0)
{
// The output shape will be the 3D shape [ out_channels * kernel_area, num_elems_per_out_channel, batches ] if batch_size_on_z == true
// or the 4D shape [ out_channels * kernel_area / num_groups, num_elems_per_out_channel, num_groups, batches ] if batch_size_on_z == false
@@ -529,17 +598,19 @@ inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Siz
ARM_COMPUTE_ERROR_ON(num_groups > 1 && input->data_layout() != DataLayout::NCHW);
ARM_COMPUTE_ERROR_ON(num_groups > 1 && batch_size_on_z);
- TensorShape output_shape{ input->tensor_shape() };
+ TensorShape output_shape{input->tensor_shape()};
const DataLayout data_layout = input->data_layout();
const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation);
- output_shape.set(0, (output_shape[channel_idx] / num_groups * kernel_dims.area() + (has_bias ? 1 : 0))); // NOLINT
+ std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(
+ output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation);
+ output_shape.set(0, ((output_shape[channel_idx] + input_pad_right) / num_groups * kernel_dims.area() +
+ (has_bias ? 1 : 0))); // NOLINT
output_shape.set(1, (out_dims.first * out_dims.second));
- if(batch_size_on_z && output_shape.num_dimensions() >= 3)
+ if (batch_size_on_z && output_shape.num_dimensions() >= 3)
{
output_shape.remove_dimension(2);
}
@@ -561,7 +632,7 @@ inline TensorShape compute_flatten_shape(const ITensorInfo *input)
{
// The output shape will be the flatten version of the input (i.e. [ width * height * channels, num_batches, ... ] ). Used for FlattenLayer and FullyConnectedLayer.
- TensorShape output_shape{ input->tensor_shape() };
+ TensorShape output_shape{input->tensor_shape()};
output_shape.collapse(3);
@@ -583,7 +654,7 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis =
// - [x,y,z,w] and axis 3 will return [x*y*z, w]
TensorShape shape2D = input->tensor_shape();
- if(axis < input->num_dimensions())
+ if (axis < input->num_dimensions())
{
// Collapse from axis onward (this changes the shape)
shape2D.collapse_from(axis);
@@ -597,7 +668,7 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis =
shape2D.collapse(shape2D.num_dimensions());
}
- if(axis == 0)
+ if (axis == 0)
{
// If axis is zero the first dim should be one. Since
// collapse is an inclusive operation we need to shift
@@ -616,15 +687,17 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis =
*/
inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
{
- TensorShape tensor_shape{ input.tensor_shape() };
+ TensorShape tensor_shape{input.tensor_shape()};
const Size2D kernel_size = winograd_info.kernel_size;
const Size2D output_tile_size = winograd_info.output_tile_size;
- const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
+ const Size2D input_tile_size =
+ Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH));
tensor_shape.set(Window::DimX, input.dimension(3));
- tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL)));
+ tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(),
+ DataLayoutDimension::CHANNEL)));
tensor_shape.set(Window::DimZ, input_tile_size.area());
return tensor_shape;
@@ -642,23 +715,22 @@ inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &inp
const PadStrideInfo conv_info = winograd_info.convolution_info;
const Size2D kernel_size = winograd_info.kernel_size;
const Size2D output_tile_size = winograd_info.output_tile_size;
- const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
+ const Size2D input_tile_size =
+ Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
const size_t idx_w = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
const size_t idx_h = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
const size_t idx_c = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL);
// Compute the number of output tiles along the x and y direction of size "output_tile_size"
- const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]),
- kernel_size,
- output_tile_size,
- conv_info);
+ const Size2D num_tiles = compute_winograd_convolution_tiles(
+ Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]), kernel_size, output_tile_size, conv_info);
const unsigned int width = input.tensor_shape()[idx_c];
const unsigned int height = num_tiles.area();
const unsigned int depth = input_tile_size.area();
- TensorShape output_shape{ input.tensor_shape() };
+ TensorShape output_shape{input.tensor_shape()};
output_shape.set(0, width);
output_shape.set(1, height);
output_shape.set(2, depth);
@@ -681,12 +753,12 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in
const DataLayout data_layout = winograd_info.output_data_layout;
// Compute output shape
- unsigned int output_width = 0;
- unsigned int output_height = 0;
+ unsigned int output_width = 0;
+ unsigned int output_height = 0;
std::tie(output_width, output_height) = scaled_dimensions(input_dimensions.width, input_dimensions.height,
kernel_size.width, kernel_size.height, conv_info);
- TensorShape tensor_shape{ input.tensor_shape() };
+ TensorShape tensor_shape{input.tensor_shape()};
// Output dimension
const unsigned int out_w = output_width;
@@ -702,20 +774,21 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in
/** Calculate the deep convolution shape output shape of a tensor
*
- * @param[in] input Input tensor info
- * @param[in] weights Weights tensor info
- * @param[in] conv_info Contains padding and stride information
+ * @param[in] input_shape Input tensor shape
+ * @param[in] input_data_layout Input data layout
+ * @param[in] weights_shape Weights tensor shape
+ * @param[in] conv_info Contains padding and stride information
*
* @return the calculated shape
*/
-inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info)
+inline TensorShape compute_deep_convolution_shape(const TensorShape &input_shape,
+ DataLayout input_data_layout,
+ const TensorShape &weights_shape,
+ const PadStrideInfo &conv_info)
{
- const TensorShape input_shape{ input.tensor_shape() };
- const TensorShape weights_shape{ weights.tensor_shape() };
-
- const size_t idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
- const size_t idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
- const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL);
+ const size_t idx_width = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_height = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::HEIGHT);
+ const size_t idx_channel = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::CHANNEL);
const unsigned int input_width = input_shape[idx_width];
const unsigned int input_height = input_shape[idx_height];
@@ -724,9 +797,10 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons
const unsigned int weights_out_channel = weights_shape[3];
unsigned int output_width = 0;
unsigned int output_height = 0;
- std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info);
+ std::tie(output_width, output_height) =
+ scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info);
- TensorShape output_shape{ input_shape };
+ TensorShape output_shape{input_shape};
output_shape.set(idx_width, output_width);
output_shape.set(idx_height, output_height);
output_shape.set(idx_channel, weights_out_channel);
@@ -734,6 +808,53 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons
return output_shape;
}
+/** Calculate the deep convolution shape output shape of a tensor
+ *
+ * @param[in] input Input tensor info
+ * @param[in] weights Weights tensor info
+ * @param[in] conv_info Contains padding and stride information
+ *
+ * @return the calculated shape
+ */
+inline TensorShape
+compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &conv_info)
+{
+ return compute_deep_convolution_shape(input.tensor_shape(), input.data_layout(), weights.tensor_shape(), conv_info);
+}
+
+/** Calculate the indirect buffer output shape used by the indirect convolution function
+ *
+ * @param[in] input_shape Input tensor shape
+ * @param[in] input_data_layout Input data layout
+ * @param[in] weights_shape Weights tensor shape
+ * @param[in] conv_info Contains padding and stride information
+ * @param[in] desc Contains the direct/indirect convolution compute arguments, such as the tiling dimensions
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_indirect_buffer_shape(const TensorShape &input_shape,
+ DataLayout input_data_layout,
+ const TensorShape &weights_shape,
+ const PadStrideInfo &conv_info,
+ const DirectConvComputeKernelInfo &desc)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(input_data_layout != DataLayout::NHWC, "The data layout can only be NHWC");
+ ARM_COMPUTE_ERROR_ON_MSG(desc.m0 <= 0 || desc.m0 > 8, "M0 can only be greater than 0 and less than or equal to 8");
+
+ const unsigned int m0 = desc.m0;
+ const unsigned int kw = weights_shape[1];
+ const unsigned int kh = weights_shape[2];
+
+ TensorShape output_conv2d_shape =
+ compute_deep_convolution_shape(input_shape, input_data_layout, weights_shape, conv_info);
+
+ const unsigned int output_w = m0 * kw * kh;
+ const unsigned int output_h = DIV_CEIL(output_conv2d_shape[1] * output_conv2d_shape[2], m0);
+ const unsigned int output_b = output_conv2d_shape[3];
+
+ return TensorShape(output_w, output_h, output_b);
+}
+
/** Calculate the min/max shape output shape of a tensor
*
* @param[in] input Input tensor info
@@ -742,7 +863,7 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons
*/
inline TensorShape compute_min_max_shape(const ITensorInfo *input)
{
- TensorShape output_shape{ input->tensor_shape() };
+ TensorShape output_shape{input->tensor_shape()};
output_shape.set(Window::DimX, 2);
output_shape.remove_dimension(1);
output_shape.remove_dimension(1);
@@ -762,7 +883,7 @@ inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo
int pooled_w = 0;
int pooled_h = 0;
- TensorShape output_shape{ input.tensor_shape() };
+ TensorShape output_shape{input.tensor_shape()};
const bool is_global_pooling = pool_info.is_global_pooling;
const int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
@@ -772,9 +893,8 @@ inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo
const int pool_size_x = is_global_pooling ? output_shape[idx_width] : pool_info.pool_size.width;
const int pool_size_y = is_global_pooling ? output_shape[idx_height] : pool_info.pool_size.height;
- std::tie(pooled_w, pooled_h) = scaled_dimensions_signed(input_width, input_height,
- pool_size_x, pool_size_y,
- pool_info.pad_stride_info);
+ std::tie(pooled_w, pooled_h) =
+ scaled_dimensions_signed(input_width, input_height, pool_size_x, pool_size_y, pool_info.pad_stride_info);
ARM_COMPUTE_ERROR_ON_MSG((pooled_w < 1 || pooled_h < 1), "Calculated output dimension size is invalid");
@@ -807,8 +927,10 @@ inline TensorShape compute_unpool_shape(const ITensorInfo &input, PoolingLayerIn
const int pad_bottom = pad_stride_info.pad_bottom();
TensorShape output_shape = input_shape;
- const unsigned int out_width = (input_shape[idx_width] - 1) * stride_x - pad_left - pad_right + pool_info.pool_size.width;
- const unsigned int out_height = (input_shape[idx_height] - 1) * stride_y - pad_top - pad_bottom + pool_info.pool_size.height;
+ const unsigned int out_width =
+ (input_shape[idx_width] - 1) * stride_x - pad_left - pad_right + pool_info.pool_size.width;
+ const unsigned int out_height =
+ (input_shape[idx_height] - 1) * stride_y - pad_top - pad_bottom + pool_info.pool_size.height;
output_shape.set(idx_width, out_width);
output_shape.set(idx_height, out_height);
@@ -823,9 +945,10 @@ inline TensorShape compute_unpool_shape(const ITensorInfo &input, PoolingLayerIn
*
* @return the calculated shape
*/
-inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info)
+inline TensorShape
+compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info)
{
- TensorShape output_shape{ input.tensor_shape() };
+ TensorShape output_shape{input.tensor_shape()};
const unsigned int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
@@ -846,7 +969,7 @@ inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITens
*/
inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned int batch_size)
{
- TensorShape output_shape{ input->tensor_shape() };
+ TensorShape output_shape{input->tensor_shape()};
output_shape.set(1, batch_size);
return output_shape;
@@ -861,15 +984,21 @@ inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned in
*
* @return the calculated shape
*/
-inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
+inline TensorShape compute_mm_shape(const ITensorInfo &input0,
+ const ITensorInfo &input1,
+ bool is_interleaved_transposed,
+ const GEMMReshapeInfo &reshape_info)
{
ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
- ARM_COMPUTE_ERROR_ON_MSG(is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(), "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true");
+ ARM_COMPUTE_ERROR_ON_MSG(
+ is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(),
+ "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true");
const bool reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d();
const bool reinterpret_output_as_3d = reshape_info.depth_output_gemm3d() != 0;
const int depth_output_gemm3d = reinterpret_output_as_3d ? reshape_info.depth_output_gemm3d() : 1;
- const int m = reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1);
+ const int m =
+ reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1);
// If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third
// dimension of the output tensor
@@ -878,7 +1007,7 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
const int dim2 = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2];
const int dim3 = reinterpret_input_as_3d ? 1 : input0.tensor_shape()[3];
- TensorShape output_shape{ input0.tensor_shape() };
+ TensorShape output_shape{input0.tensor_shape()};
output_shape.set(0, dim0);
output_shape.set(1, dim1);
@@ -897,7 +1026,8 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
*
* @return the calculated shape
*/
-inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info)
+inline TensorShape
+compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info)
{
ARM_COMPUTE_UNUSED(input1);
ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
@@ -906,9 +1036,9 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d() != 0;
const int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d() : 1;
- TensorShape output_shape{ input0.tensor_shape() };
+ TensorShape output_shape{input0.tensor_shape()};
- if(!reinterpret_input_as_3d && !reinterpret_output_as_3d)
+ if (!reinterpret_input_as_3d && !reinterpret_output_as_3d)
{
output_shape.set(0, gemm_info.n());
output_shape.set(1, gemm_info.m());
@@ -935,7 +1065,8 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
*
* @return the calculated shape
*/
-inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info)
+inline TensorShape
+compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info)
{
ARM_COMPUTE_UNUSED(input1);
ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
@@ -944,9 +1075,9 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0;
const unsigned int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d : 1;
- TensorShape output_shape{ input0.tensor_shape() };
+ TensorShape output_shape{input0.tensor_shape()};
- if(!reinterpret_input_as_3d && !reinterpret_output_as_3d)
+ if (!reinterpret_input_as_3d && !reinterpret_output_as_3d)
{
output_shape.set(0, gemm_info.n);
output_shape.set(1, gemm_info.m);
@@ -967,20 +1098,50 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
/** Calculate the matrix multiplication output shape of two tensors
*
+ * @param[in] input0 First input tensor info
+ * @param[in] input1 Second input tensor info
+ * @param[in] matmul_info Batch MatMul Kernel info to know which matrix is transposed
+ *
+ * @return the calculated shape
+ */
+inline TensorShape
+compute_matmul_shape(const TensorShape &input0, const TensorShape &input1, const MatMulKernelInfo &matmul_info)
+{
+ TensorShape output_shape{input0};
+
+ if (matmul_info.adj_lhs)
+ {
+ output_shape.set(1, input0[0]); // The vertical (M) dimension
+ }
+
+ if (matmul_info.adj_rhs)
+ {
+ output_shape.set(0, input1[1]); // The horizontal (N) dimension
+ }
+ else
+ {
+ output_shape.set(0, input1[0]); // The horizontal (N) dimension
+ }
+
+ return output_shape;
+}
+/** Calculate the matrix multiplication output shape of two tensors
+ *
* @param[in] input Input tensor info
* @param[in] gemm_3d_depth (Optional) GEMM 3d depth
* @param[in] batch_size_on_z (Optional) True if batch size is on z axis
*
* @return the calculated shape
*/
-inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false)
+inline TensorShape
+compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false)
{
ARM_COMPUTE_ERROR_ON(input.data_layout() != DataLayout::NHWC && gemm_3d_depth > 1);
TensorShape output_shape = input.tensor_shape();
- if(gemm_3d_depth > 1)
+ if (gemm_3d_depth > 1)
{
- if(batch_size_on_z)
+ if (batch_size_on_z)
{
output_shape.shift_right(1);
}
@@ -1005,11 +1166,16 @@ inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned
* @return the calculated shape
*/
inline TensorShape compute_strided_slice_shape(const ITensorInfo &input,
- const Coordinates &starts, const Coordinates &ends, const Coordinates &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const Coordinates &strides,
+ int32_t begin_mask,
+ int32_t end_mask,
+ int32_t shrink_axis_mask)
{
using namespace arm_compute::helpers::tensor_transform;
- return compute_strided_slice_output_shape(input.tensor_shape(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
+ return compute_strided_slice_output_shape(input.tensor_shape(), starts, ends, strides, begin_mask, end_mask,
+ shrink_axis_mask);
}
/** Calculate the slice output shape of a tensor
@@ -1020,36 +1186,48 @@ inline TensorShape compute_strided_slice_shape(const ITensorInfo &input,
*
* @return the calculated shape
*/
-inline TensorShape compute_slice_shape(const TensorShape &input_shape, const Coordinates &starts, const Coordinates &ends)
+inline TensorShape
+compute_slice_shape(const TensorShape &input_shape, const Coordinates &starts, const Coordinates &ends)
{
using namespace arm_compute::helpers::tensor_transform;
- return compute_strided_slice_output_shape(input_shape,
- starts, ends, BiStrides(),
- 0, construct_slice_end_mask(ends), 0);
+ return compute_strided_slice_output_shape(input_shape, starts, ends, BiStrides(), 0, construct_slice_end_mask(ends),
+ 0);
}
/** Calculate the batch to space output shape of a tensor
*
- * @param[in] input Input tensor info
- * @param[in] block_x Block shape x value
- * @param[in] block_y Block shape y value
+ * @param[in] data_layout Data layout
+ * @param[in] input Input tensor shape
+ * @param[in] block_x Block shape x value
+ * @param[in] block_y Block shape y value
+ * @param[in] crop_info Information about how the output shape is cropped after batch to space is performed
*
* @return the calculated shape
*/
-inline TensorShape compute_batch_to_space_shape(const ITensorInfo *input, const int block_x, const int block_y)
+inline TensorShape compute_batch_to_space_shape(
+ DataLayout data_layout, const TensorShape &input, int block_x, int block_y, const CropInfo &crop_info = CropInfo{})
{
- ARM_COMPUTE_ERROR_ON(block_x <= 0 || block_y <= 0);
+ ARM_COMPUTE_ERROR_ON(block_x < 1 || block_y < 1);
- const DataLayout data_layout = input->data_layout();
- const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
+ const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
+
+ TensorShape output_shape{input};
+
+ unsigned int new_width = input[idx_width] * static_cast<unsigned int>(block_x);
+ unsigned int new_height = input[idx_height] * static_cast<unsigned int>(block_y);
+ const unsigned int width_crop = crop_info.left + crop_info.right;
+ const unsigned int height_crop = crop_info.top + crop_info.bottom;
+ ARM_COMPUTE_ERROR_ON(new_width <= width_crop);
+ ARM_COMPUTE_ERROR_ON(new_height <= height_crop);
+ new_width -= width_crop;
+ new_height -= height_crop;
- TensorShape output_shape{ input->tensor_shape() };
- output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_x);
- output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_y);
- output_shape.set(idx_batch, input->tensor_shape()[idx_batch] / (block_x * block_y));
+ output_shape.set(idx_width, new_width);
+ output_shape.set(idx_height, new_height);
+ output_shape.set(idx_batch, input[idx_batch] / (block_x * block_y));
return output_shape;
}
@@ -1070,7 +1248,7 @@ inline TensorShape compute_depth_to_space_shape(const TensorShape &input_shape,
const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- TensorShape output_shape{ input_shape };
+ TensorShape output_shape{input_shape};
output_shape.set(idx_width, input_shape[idx_width] * block);
output_shape.set(idx_height, input_shape[idx_height] * block);
output_shape.set(idx_channel, input_shape[idx_channel] / (block * block));
@@ -1091,10 +1269,10 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax
TensorShape empty_shape;
empty_shape.set(0, 0);
- TensorShape out_shape{ input->tensor_shape() };
+ TensorShape out_shape{input->tensor_shape()};
// Return empty shape if axis is invalid
- if(axis > input->tensor_shape().num_dimensions())
+ if (axis > input->tensor_shape().num_dimensions())
{
return empty_shape;
}
@@ -1102,7 +1280,7 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax
size_t axis_size = out_shape[axis];
// Return empty shape if num_split is not valid
- if(axis_size % num_splits)
+ if (axis_size % num_splits)
{
return empty_shape;
}
@@ -1121,9 +1299,10 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax
*
* @return the calculated shape
*/
-inline TensorShape compute_space_to_batch_shape(const ITensorInfo *input, const int block_x, const int block_y, const Size2D &padding_left, const Size2D &padding_right)
+inline TensorShape compute_space_to_batch_shape(
+ const ITensorInfo *input, int block_x, int block_y, const Size2D &padding_left, const Size2D &padding_right)
{
- TensorShape output_shape{ input->tensor_shape() };
+ TensorShape output_shape{input->tensor_shape()};
const DataLayout data_layout = input->data_layout();
const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
@@ -1149,16 +1328,16 @@ inline TensorShape compute_space_to_batch_shape(const ITensorInfo *input, const
*/
inline TensorShape compute_space_to_depth_shape(const ITensorInfo *input, int32_t block_shape)
{
- TensorShape output_shape{ input->tensor_shape() };
+ TensorShape output_shape{input->tensor_shape()};
const DataLayout data_layout = input->data_layout();
const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
const int idx_depth = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_shape);
- output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_shape);
- output_shape.set(idx_depth, input->tensor_shape()[idx_depth] / (block_shape * block_shape));
+ output_shape.set(idx_width, input->tensor_shape()[idx_width] / block_shape);
+ output_shape.set(idx_height, input->tensor_shape()[idx_height] / block_shape);
+ output_shape.set(idx_depth, input->tensor_shape()[idx_depth] * (block_shape * block_shape));
return output_shape;
}
@@ -1194,7 +1373,7 @@ inline TensorShape compute_prior_box_shape(const ITensorInfo &input, const Prior
inline TensorShape compute_padded_shape(const TensorShape &input_shape, const PaddingList &padding)
{
TensorShape padded_shape = input_shape;
- for(size_t dim = 0; dim < padding.size(); ++dim)
+ for (size_t dim = 0; dim < padding.size(); ++dim)
{
const auto &padding_pair = padding[dim];
const uint32_t shape_on_index = (padded_shape.num_dimensions() <= dim) ? 1 : input_shape[dim];
@@ -1213,7 +1392,7 @@ inline TensorShape compute_padded_shape(const TensorShape &input_shape, const Pa
inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Multiples &multiples)
{
TensorShape tiled_shape = input_shape;
- for(size_t dim = 0; dim < multiples.size(); ++dim)
+ for (size_t dim = 0; dim < multiples.size(); ++dim)
{
tiled_shape.set(dim, input_shape[dim] * multiples[dim]);
}
@@ -1230,9 +1409,9 @@ inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Mul
*/
inline TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis, bool keep_dims = true)
{
- TensorShape output_shape{ input };
+ TensorShape output_shape{input};
- if(!keep_dims)
+ if (!keep_dims)
{
output_shape.remove_dimension(axis);
}
@@ -1325,14 +1504,14 @@ inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, si
#if defined(ARM_COMPUTE_ASSERTS_ENABLED)
// All dimensions must match except the axis one
- for(unsigned int i = 0; i < MAX_DIMS; ++i)
+ for (unsigned int i = 0; i < MAX_DIMS; ++i)
{
- if(i == axis)
+ if (i == axis)
{
continue;
}
- for(const auto &tensor : input)
+ for (const auto &tensor : input)
{
ARM_COMPUTE_ERROR_ON(tensor == nullptr);
const TensorShape shape = extract_shape(tensor);
@@ -1343,7 +1522,7 @@ inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, si
// Calculate output shape
size_t new_size = 0;
- for(const auto &tensor : input)
+ for (const auto &tensor : input)
{
const TensorShape shape = extract_shape(tensor);
new_size += shape[axis];
@@ -1366,14 +1545,14 @@ inline TensorShape compute_stack_shape(const ITensorInfo &a, unsigned int axis,
ARM_COMPUTE_ERROR_ON(axis > a.num_dimensions());
ARM_COMPUTE_ERROR_ON(a.num_dimensions() > 4);
- TensorShape shape_out{ a.tensor_shape() };
+ TensorShape shape_out{a.tensor_shape()};
shape_out.set(axis, num_tensors);
unsigned int i_shift = 0;
- for(unsigned int i = 0; i < a.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < a.num_dimensions(); ++i)
{
- if(i == axis)
+ if (i == axis)
{
i_shift++;
}
@@ -1383,18 +1562,177 @@ inline TensorShape compute_stack_shape(const ITensorInfo &a, unsigned int axis,
return shape_out;
}
-inline TensorShape compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_shape, uint32_t actual_axis)
+/** Calculate the output shape of 3d Convolution
+ *
+ * @param[in] src Input tensor shape
+ * @param[in] weights Weights tensor shape
+ * @param[in] conv3d_info 3d Convolution Parameters object
+ *
+ * @return the calculated shape
+ */
+inline TensorShape
+compute_conv3d_shape(const TensorShape &src, const TensorShape &weights, const Conv3dInfo &conv3d_info)
{
- ARM_COMPUTE_ERROR_ON(indices_shape.num_dimensions() > 1);
- ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() > 4);
- ARM_COMPUTE_ERROR_ON(actual_axis >= input_shape.num_dimensions());
+ // Weight tensor shape indices (D H W Cin Cout)
+ constexpr unsigned int weights_depth_dim = 4u;
+ constexpr unsigned int weights_height_dim = 3u;
+ constexpr unsigned int weights_width_dim = 2u;
+ constexpr unsigned int weights_CHout_dim = 0u;
+
+ // Source/Destination Tensor shape indices (N D H W C)
+ constexpr unsigned int batch_dim = 4u;
+ constexpr unsigned int depth_dim = 3u;
+ constexpr unsigned int height_dim = 2u;
+ constexpr unsigned int width_dim = 1u;
+ constexpr unsigned int channel_dim = 0u;
+
+ TensorShape output_shape{src};
+ const size_t pad_left = conv3d_info.padding.left;
+ const size_t pad_right = conv3d_info.padding.right;
+ const size_t pad_top = conv3d_info.padding.top;
+ const size_t pad_bottom = conv3d_info.padding.bottom;
+ const size_t pad_front = conv3d_info.padding.front;
+ const size_t pad_back = conv3d_info.padding.back;
+ const size_t dilation_x = conv3d_info.dilation.width;
+ const size_t dilation_y = conv3d_info.dilation.height;
+ const size_t dilation_z = conv3d_info.dilation.depth;
+ const size_t stride_x = conv3d_info.stride.x();
+ const size_t stride_y = conv3d_info.stride.y();
+ const size_t stride_z = conv3d_info.stride.z();
+
+ int output_width_size = 0;
+ int output_height_size = 0;
+ int output_depth_size = 0;
+
+ switch (conv3d_info.round_type)
+ {
+ case DimensionRoundingType::FLOOR:
+ output_width_size =
+ static_cast<int>(std::floor((static_cast<float>(src[width_dim] + pad_left + pad_right -
+ (dilation_x * (weights[weights_width_dim] - 1) + 1)) /
+ stride_x) +
+ 1));
+ output_height_size =
+ static_cast<int>(std::floor((static_cast<float>(src[height_dim] + pad_top + pad_bottom -
+ (dilation_y * (weights[weights_height_dim] - 1) + 1)) /
+ stride_y) +
+ 1));
+ output_depth_size =
+ static_cast<int>(std::floor((static_cast<float>(src[depth_dim] + pad_front + pad_back -
+ (dilation_z * (weights[weights_depth_dim] - 1) + 1)) /
+ stride_z) +
+ 1));
+ break;
+ case DimensionRoundingType::CEIL:
+ output_width_size =
+ static_cast<int>(std::ceil((static_cast<float>(src[width_dim] + pad_left + pad_right -
+ (dilation_x * (weights[weights_width_dim] - 1) + 1)) /
+ stride_x) +
+ 1));
+ output_height_size =
+ static_cast<int>(std::ceil((static_cast<float>(src[height_dim] + pad_top + pad_bottom -
+ (dilation_y * (weights[weights_height_dim] - 1) + 1)) /
+ stride_y) +
+ 1));
+ output_depth_size =
+ static_cast<int>(std::ceil((static_cast<float>(src[depth_dim] + pad_front + pad_back -
+ (dilation_z * (weights[weights_depth_dim] - 1) + 1)) /
+ stride_z) +
+ 1));
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported rounding type");
+ }
+
+ output_shape.set(batch_dim, src[batch_dim]);
+ output_shape.set(width_dim, output_width_size);
+ output_shape.set(height_dim, output_height_size);
+ output_shape.set(depth_dim, output_depth_size);
+ output_shape.set(channel_dim, weights[weights_CHout_dim]);
+ return output_shape;
+}
+
+/** Calculate the output pool3d shape of a tensor
+ *
+ * @param[in] src Input tensor info
+ * @param[in] pool3d_info Pooling layer info
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_pool3d_shape(const TensorShape &src, Pooling3dLayerInfo pool3d_info)
+{
+ TensorShape output_shape{src};
+
+ const auto data_layout = DataLayout::NDHWC;
+ const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const int idx_depth = get_data_layout_dimension_index(data_layout, DataLayoutDimension::DEPTH);
+ const int pool_size_width = pool3d_info.is_global_pooling ? src[idx_width] : pool3d_info.pool_size.width;
+ const int pool_size_height = pool3d_info.is_global_pooling ? src[idx_height] : pool3d_info.pool_size.height;
+ const int pool_size_depth = pool3d_info.is_global_pooling ? src[idx_depth] : pool3d_info.pool_size.depth;
+ int output_width = 0;
+ int output_height = 0;
+ int output_depth = 0;
+
+ std::tie(output_width, output_height, output_depth) =
+ scaled_3d_dimensions_signed(src[idx_width], src[idx_height], src[idx_depth], pool_size_width, pool_size_height,
+ pool_size_depth, pool3d_info);
+
+ ARM_COMPUTE_ERROR_ON_MSG((output_width < 1 || output_height < 1 || output_depth < 1),
+ "Calculated output dimension size is invalid");
+
+ output_shape.set(idx_width, static_cast<size_t>(output_width));
+ output_shape.set(idx_height, static_cast<size_t>(output_height));
+ output_shape.set(idx_depth, static_cast<size_t>(output_depth));
+
+ return output_shape;
+}
+
+/** Calculate the gather output shape of a tensor
+ *
+ * @param[in] input_shape Input tensor shape
+ * @param[in] indices_shape Indices tensor shape. Only supports for 2d and 3d indices
+ * @param[in] actual_axis Axis to be used in the computation
+ *
+ * @note Let input_shape be (X,Y,Z) and indices shape (W,O,P) and axis 1
+ * the new shape is computed by replacing the axis in the input shape with
+ * the indice shape so the output shape will be (X,W,O,P,Z)
+ *
+ * @return the calculated shape
+ */
+inline TensorShape
+compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_shape, uint32_t actual_axis)
+{
+ const auto input_num_dims = input_shape.num_dimensions();
+ const auto indices_num_dims = indices_shape.num_dimensions();
+
+ ARM_COMPUTE_ERROR_ON(actual_axis >= input_num_dims);
+ ARM_COMPUTE_ERROR_ON(input_num_dims + indices_num_dims - 1 > Coordinates::num_max_dimensions);
+
+ TensorShape output_shape;
+ size_t dim_no = 0;
+
+ for (; dim_no < actual_axis; ++dim_no)
+ {
+ output_shape.set(dim_no, input_shape[dim_no]);
+ }
+
+ for (; dim_no < actual_axis + indices_num_dims; ++dim_no)
+ {
+ output_shape.set(dim_no, indices_shape[dim_no - actual_axis]);
+ }
+
+ for (; dim_no < input_num_dims + indices_num_dims - 1; ++dim_no)
+ {
+ output_shape.set(dim_no, input_shape[dim_no + 1 - indices_num_dims]);
+ }
- TensorShape output_shape = input_shape;
- output_shape[actual_axis] = indices_shape[0];
+ ARM_COMPUTE_ERROR_ON(input_shape.total_size() * indices_shape.total_size() !=
+ output_shape.total_size() * input_shape[actual_axis]);
return output_shape;
}
} // namespace shape_calculator
} // namespace misc
} // namespace arm_compute
-#endif /* ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H */
+#endif // ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H
diff --git a/arm_compute/core/utils/misc/Traits.h b/arm_compute/core/utils/misc/Traits.h
index 933922f63c..944fcb95f9 100644
--- a/arm_compute/core/utils/misc/Traits.h
+++ b/arm_compute/core/utils/misc/Traits.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_UTILS_TRAITS_TRAITS_H
#include "arm_compute/core/Types.h"
+
#include <type_traits>
namespace arm_compute
diff --git a/arm_compute/core/utils/misc/Utility.h b/arm_compute/core/utils/misc/Utility.h
index 648758ca07..22f10d74cc 100644
--- a/arm_compute/core/utils/misc/Utility.h
+++ b/arm_compute/core/utils/misc/Utility.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,7 @@
#include <algorithm>
#include <array>
+#include <cstdint>
#include <limits>
#include <numeric>
#include <vector>
@@ -43,7 +44,7 @@ struct index_sequence
};
template <std::size_t N, std::size_t... S>
-struct index_sequence_generator : index_sequence_generator < N - 1, N - 1, S... >
+struct index_sequence_generator : index_sequence_generator<N - 1, N - 1, S...>
{
};
@@ -57,17 +58,17 @@ template <std::size_t N>
using index_sequence_t = typename index_sequence_generator<N>::type;
template <typename T, std::size_t N, T val, T... vals>
-struct generate_array : generate_array < T, N - 1, val, val, vals... >
+struct generate_array : generate_array<T, N - 1, val, val, vals...>
{
};
template <typename T, T val, T... vals>
struct generate_array<T, 0, val, vals...>
{
- static constexpr std::array<T, sizeof...(vals)> value{ vals... };
+ static constexpr std::array<T, sizeof...(vals)> value{vals...};
};
-template <typename T, T val, T... vals>
+template <typename T, T val, T... vals>
constexpr std::array<T, sizeof...(vals)> generate_array<T, 0, val, vals...>::value;
/** @endcond */
@@ -78,7 +79,7 @@ template <std::size_t... S,
typename T = std::array<typename std::iterator_traits<Iterator>::value_type, sizeof...(S)>>
T make_array(Iterator first, index_sequence<S...>)
{
- return T{ { first[S]... } };
+ return T{{first[S]...}};
}
} // namespace detail
@@ -86,7 +87,7 @@ template <std::size_t N, typename Iterator>
std::array<typename std::iterator_traits<Iterator>::value_type, N> make_array(Iterator first, Iterator last)
{
ARM_COMPUTE_UNUSED(last);
- return detail::make_array(first, index_sequence_t<N> {});
+ return detail::make_array(first, index_sequence_t<N>{});
}
/** Performs clamping among a lower and upper value.
@@ -118,7 +119,7 @@ inline void for_each(F &&)
* @param[in] args Remaining arguments
*/
template <typename F, typename T, typename... Ts>
-inline void for_each(F &&func, T &&arg, Ts &&... args)
+inline void for_each(F &&func, T &&arg, Ts &&...args)
{
func(std::forward<T>(arg));
for_each(std::forward<F>(func), std::forward<Ts>(args)...);
@@ -142,9 +143,11 @@ inline T &&foldl(F &&, T &&value)
* @param[in] values Remaining arguments
*/
template <typename F, typename T, typename U, typename... Us>
-inline auto foldl(F &&func, T &&initial, U &&value, Us &&... values) -> decltype(func(std::forward<T>(initial), std::forward<U>(value)))
+inline auto foldl(F &&func, T &&initial, U &&value, Us &&...values)
+ -> decltype(func(std::forward<T>(initial), std::forward<U>(value)))
{
- return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)), std::forward<Us>(values)...);
+ return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)),
+ std::forward<Us>(values)...);
}
/** Perform an index sort of a given vector.
@@ -159,11 +162,7 @@ std::vector<size_t> sort_indices(const std::vector<T> &v)
std::vector<size_t> idx(v.size());
std::iota(idx.begin(), idx.end(), 0);
- std::sort(idx.begin(), idx.end(),
- [&v](size_t i1, size_t i2)
- {
- return v[i1] < v[i2];
- });
+ std::sort(idx.begin(), idx.end(), [&v](size_t i1, size_t i2) { return v[i1] < v[i2]; });
return idx;
}
@@ -177,7 +176,7 @@ std::vector<size_t> sort_indices(const std::vector<T> &v)
*/
inline bool endswith(const std::string &str, const std::string &suffix)
{
- if(str.size() < suffix.size())
+ if (str.size() < suffix.size())
{
return false;
}
@@ -204,10 +203,7 @@ inline bool check_aligned(void *ptr, const size_t alignment)
*/
inline std::string tolower(std::string string)
{
- std::transform(string.begin(), string.end(), string.begin(), [](unsigned char c)
- {
- return std::tolower(c);
- });
+ std::transform(string.begin(), string.end(), string.begin(), [](unsigned char c) { return std::tolower(c); });
return string;
}
@@ -226,7 +222,7 @@ inline std::string getenv(const std::string &env_name)
return std::string{};
#else // BARE_METAL
const auto env_chr = std::getenv(env_name.c_str());
- return env_chr == nullptr ? std::string{} : std::string{ env_chr };
+ return env_chr == nullptr ? std::string{} : std::string{env_chr};
#endif // BARE_METAL
}
} // namespace utility
diff --git a/arm_compute/core/utils/quantization/AsymmHelpers.h b/arm_compute/core/utils/quantization/AsymmHelpers.h
index cbf7559bc9..2324fe1838 100644
--- a/arm_compute/core/utils/quantization/AsymmHelpers.h
+++ b/arm_compute/core/utils/quantization/AsymmHelpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,10 @@ namespace quantization
*
* @return a status
*/
-Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon = false);
+Status calculate_quantized_multiplier(float multiplier,
+ int32_t *quant_multiplier,
+ int32_t *shift,
+ bool ignore_epsilon = false);
/** Calculate quantized representation of multiplier with value less than one.
*
* @param[in] multiplier Real multiplier.
@@ -51,7 +54,10 @@ Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplie
*
* @return a status
*/
-Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *quant_multiplier, int32_t *right_shift, bool ignore_epsilon = false);
+Status calculate_quantized_multiplier_less_than_one(float multiplier,
+ int32_t *quant_multiplier,
+ int32_t *right_shift,
+ bool ignore_epsilon = false);
/** Calculate quantized representation of multiplier having value greater than one.
*
* @param[in] multiplier Real multiplier.
@@ -60,7 +66,8 @@ Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *q
*
* @return a status
*/
-Status calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t *quantized_multiplier, int32_t *left_shift);
+Status
+calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t *quantized_multiplier, int32_t *left_shift);
/** Calculate quantized representation of per-channel multipliers
*
@@ -71,9 +78,9 @@ Status calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t
*
* @return a status
*/
-Status calculate_quantized_multipliers(const QuantizationInfo &iq_info,
- const QuantizationInfo &wq_info,
- const QuantizationInfo &oq_info,
+Status calculate_quantized_multipliers(const QuantizationInfo &iq_info,
+ const QuantizationInfo &wq_info,
+ const QuantizationInfo &oq_info,
GEMMLowpOutputStageInfo &stage_info);
/** Get minimum and maximum values for the input quantized data type
@@ -81,6 +88,7 @@ Status calculate_quantized_multipliers(const QuantizationInfo &iq_info,
* @return min and max values for the quantized data type
*/
std::pair<int, int> get_min_max_values_from_quantized_data_type(DataType data_type);
+
/** Compute quantized per-channel multipliers and shifts. As many multipliers
* and shifts as output channels are computed. If weights are not quantized
* per-channel, multipliers and shifts will end up being the same for each
@@ -89,16 +97,12 @@ std::pair<int, int> get_min_max_values_from_quantized_data_type(DataType data_ty
* @param[in] input Input tensor info.
* @param[in] weights Weights tensor info.
* @param[in] output Output tensor info.
- * @param[in] idx_ofms Dimension index to get OFMs from the weights tensor.
* @param[out] output_multipliers_ptr Pointer to the buffer where to store per-channel multipliers.
* @param[out] output_shifts_ptr Pointer to the buffer where to store per-channel shifts.
- *
- * @return min and max values for the quantized data type
*/
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input,
const ITensorInfo *weights,
const ITensorInfo *output,
- unsigned int idx_ofms,
int32_t *output_multipliers_ptr,
int32_t *output_shifts_ptr);
@@ -150,7 +154,10 @@ int32_t saturating_rounding_multiply_by_pow2(int32_t exponent, int32_t v);
* @param[out] output_shift Shift for inverse square root
*
*/
-void get_invsqrt_quantized_multiplier_exp(int32_t input, int32_t reverse_shift, int32_t &output_inv_sqrt, int32_t &output_shift);
+void get_invsqrt_quantized_multiplier_exp(int32_t input,
+ int32_t reverse_shift,
+ int32_t &output_inv_sqrt,
+ int32_t &output_shift);
} // namespace quantization
} // namespace arm_compute