diff options
Diffstat (limited to 'src/core/helpers')
-rw-r--r-- | src/core/helpers/AutoConfiguration.h | 183 | ||||
-rw-r--r-- | src/core/helpers/LUTManager.cpp | 79 | ||||
-rw-r--r-- | src/core/helpers/LUTManager.h | 73 | ||||
-rw-r--r-- | src/core/helpers/MemoryHelpers.h | 139 | ||||
-rw-r--r-- | src/core/helpers/NormalizationHelpers.h | 47 | ||||
-rw-r--r-- | src/core/helpers/PoolingHelpers.h | 219 | ||||
-rw-r--r-- | src/core/helpers/ScaleHelpers.h | 207 | ||||
-rw-r--r-- | src/core/helpers/SoftmaxHelpers.cpp | 45 | ||||
-rw-r--r-- | src/core/helpers/SoftmaxHelpers.h | 50 | ||||
-rw-r--r-- | src/core/helpers/Utils.cpp | 49 | ||||
-rw-r--r-- | src/core/helpers/Utils.h | 120 | ||||
-rw-r--r-- | src/core/helpers/WindowHelpers.cpp | 349 | ||||
-rw-r--r-- | src/core/helpers/WindowHelpers.h | 230 |
13 files changed, 1790 insertions, 0 deletions
diff --git a/src/core/helpers/AutoConfiguration.h b/src/core/helpers/AutoConfiguration.h new file mode 100644 index 0000000000..9df2a76983 --- /dev/null +++ b/src/core/helpers/AutoConfiguration.h @@ -0,0 +1,183 @@ +/* +* Copyright (c) 2020, 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_HELPERS_AUTOCONFIGURATION_H +#define SRC_CORE_HELPERS_AUTOCONFIGURATION_H + +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/DataTypeUtils.h" + +namespace arm_compute +{ +/** Auto initialize the tensor info (shape, number of channels and data type) if the current assignment is empty. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] shape New shape. + * @param[in] num_channels New number of channels. + * @param[in] data_type New data type + * @param[in] quantization_info (Optional) New quantization info + * + * @return True if the tensor info has been initialized + */ +inline bool auto_init_if_empty(ITensorInfo &info, + const TensorShape &shape, + int num_channels, + DataType data_type, + QuantizationInfo quantization_info = QuantizationInfo()) +{ + if (info.tensor_shape().total_size() == 0) + { + info.set_data_type(data_type); + info.set_num_channels(num_channels); + info.set_tensor_shape(shape); + info.set_quantization_info(quantization_info); + return true; + } + + return false; +} + +/** Auto initialize the tensor info using another tensor info. + * + * (COMPMID-6012) This method should remain in sync with the fields of ITensorInfo that have setters. + * + * + * @param info_sink Tensor info used to check and assign + * @param info_source Tensor info used to assign + * + * + * @return True if the tensor info has been initialized + */ +inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source) +{ + if (info_sink.tensor_shape().total_size() == 0) + { + info_sink.set_data_type(info_source.data_type()); + info_sink.set_num_channels(info_source.num_channels()); + info_sink.set_tensor_shape(info_source.tensor_shape()); + info_sink.set_quantization_info(info_source.quantization_info()); + info_sink.set_data_layout(info_source.data_layout()); + info_sink.set_are_values_constant(info_source.are_values_constant()); + return true; + } + + return false; +} + +/** Set the shape to the specified value if the current assignment is empty. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] shape New shape. + * + * @return True if the shape has been changed. + */ +inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape) +{ + if (info.tensor_shape().total_size() == 0) + { + info.set_tensor_shape(shape); + return true; + } + + return false; +} + +/** Set the format, data type and number of channels to the specified value if + * the current data type is unknown. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] format New format. + * + * @return True if the format has been changed. + */ +inline bool set_format_if_unknown(ITensorInfo &info, Format format) +{ + if (info.data_type() == DataType::UNKNOWN) + { + info.set_format(format); + return true; + } + + return false; +} + +/** Set the data type and number of channels to the specified value if + * the current data type is unknown. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] data_type New data type. + * + * @return True if the data type has been changed. + */ +inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type) +{ + if (info.data_type() == DataType::UNKNOWN) + { + info.set_data_type(data_type); + return true; + } + + return false; +} + +/** Set the data layout to the specified value if + * the current data layout is unknown. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] data_layout New data layout. + * + * @return True if the data type has been changed. + */ +inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout) +{ + if (info.data_layout() == DataLayout::UNKNOWN) + { + info.set_data_layout(data_layout); + return true; + } + + return false; +} + +/** Set the quantization info to the specified value if + * the current quantization info is empty and the data type of asymmetric quantized type + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] quantization_info Quantization info + * + * @return True if the quantization info has been changed. + */ +inline bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info) +{ + if (info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type()))) + { + info.set_quantization_info(quantization_info); + return true; + } + + return false; +} +} // namespace arm_compute + +#endif /* SRC_CORE_HELPERS_AUTOCONFIGURATION_H */ diff --git a/src/core/helpers/LUTManager.cpp b/src/core/helpers/LUTManager.cpp new file mode 100644 index 0000000000..06e35eed8c --- /dev/null +++ b/src/core/helpers/LUTManager.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/core/helpers/LUTManager.h" + +namespace arm_compute +{ +#ifdef __aarch64__ +namespace +{ + +void init_lut_fp16(ActivationLayerInfo::LookupTable65536 *lut) +{ + union Element + { + uint16_t i = 0; + float16_t fp; + } item; + // Fill lut by iterating over all 16 bit values using the union. + while (true) + { + (*lut)[item.i] = 1.f / (1.f + std::exp(-item.fp)); + if (item.i == 65535) + break; + item.i++; + } +} +} // namespace + +std::shared_ptr<ActivationLayerInfo::LookupTable65536> LUTManager::get_lut_table(LUTInfo info) +{ + const auto itr = map_fp16.find(info); + auto s_ptr = (itr != map_fp16.end()) ? itr->second.lock() : nullptr; // nullptr if invalid or not found. + if (s_ptr != nullptr) + { + // Found and valid + return s_ptr; // Return weak ptr as shared ptr + } + else + { + // Not found, or pointer not valid + // We do not use make_shared to prevent the weak_ptr keeping the control block alive + std::shared_ptr<ActivationLayerInfo::LookupTable65536> ptr(new ActivationLayerInfo::LookupTable65536); + init_lut_fp16(ptr.get()); + map_fp16[info] = ptr; + return ptr; + } +} +#endif // __aarch64__ + +// Static function to get LutManager instance +LUTManager &LUTManager::get_instance() +{ + static auto inst_ = std::make_unique<LUTManager>(); // The one, single instance. + return *inst_; +} + +} // namespace arm_compute diff --git a/src/core/helpers/LUTManager.h b/src/core/helpers/LUTManager.h new file mode 100644 index 0000000000..4e13ead7e3 --- /dev/null +++ b/src/core/helpers/LUTManager.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_SRC_CORE_HELPERS_LUTMANAGER_H +#define ACL_SRC_CORE_HELPERS_LUTMANAGER_H + +#include "arm_compute/core/CoreTypes.h" +#include "arm_compute/core/QuantizationInfo.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" + +#include <map> +#include <memory> + +namespace arm_compute +{ + +struct LUTInfo +{ + ActivationLayerInfo::ActivationFunction act; + DataType dt; + QuantizationInfo qinfo; + // Operators enable use of map with Lutinfo as key + friend bool operator<(const LUTInfo &l, const LUTInfo &r) + { + return (l.act < r.act) || ((l.act == r.act) && (l.dt < r.dt)) || + ((l.act == r.act) && (l.dt == r.dt) && (l.qinfo.scale() < r.qinfo.scale())) || + ((l.act == r.act) && (l.dt == r.dt) && (l.qinfo.scale() == r.qinfo.scale()) && + (l.qinfo.offset() < l.qinfo.offset())); + } + bool operator==(const LUTInfo &l) + { + return this->act == l.act && this->dt == l.dt && this->qinfo == l.qinfo; + } +}; + +/* Class to handle getting look up table */ +class LUTManager +{ +public: + LUTManager() = default; + + static LUTManager &get_instance(); +#ifdef __aarch64__ + std::shared_ptr<ActivationLayerInfo::LookupTable65536> get_lut_table(LUTInfo info); + +private: + std::map<LUTInfo, std::weak_ptr<ActivationLayerInfo::LookupTable65536>> map_fp16{}; +#endif // __aarch64__ +}; + +} // namespace arm_compute +#endif // ACL_SRC_CORE_HELPERS_LUTMANAGER_H diff --git a/src/core/helpers/MemoryHelpers.h b/src/core/helpers/MemoryHelpers.h new file mode 100644 index 0000000000..dd094b414c --- /dev/null +++ b/src/core/helpers/MemoryHelpers.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_COMMON_MEMORY_HELPERS_H +#define SRC_COMMON_MEMORY_HELPERS_H + +#include "arm_compute/core/experimental/Types.h" +#include "arm_compute/core/ITensorPack.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/MemoryGroup.h" + +#include <memory> +#include <utility> +#include <vector> + +namespace arm_compute +{ +inline int offset_int_vec(int offset) +{ + return ACL_INT_VEC + offset; +} + +template <typename TensorType> +struct WorkspaceDataElement +{ + int slot{-1}; + experimental::MemoryLifetime lifetime{experimental::MemoryLifetime::Temporary}; + std::unique_ptr<TensorType> tensor{nullptr}; +}; + +template <typename TensorType> +using WorkspaceData = std::vector<WorkspaceDataElement<TensorType>>; + +template <typename TensorType> +WorkspaceData<TensorType> +manage_workspace(const experimental::MemoryRequirements &mem_reqs, MemoryGroup &mgroup, ITensorPack &run_pack) +{ + ITensorPack dummy_pack = ITensorPack(); + return manage_workspace<TensorType>(mem_reqs, mgroup, run_pack, dummy_pack); +} + +template <typename TensorType> +WorkspaceData<TensorType> manage_workspace(const experimental::MemoryRequirements &mem_reqs, + MemoryGroup &mgroup, + ITensorPack &run_pack, + ITensorPack &prep_pack) +{ + WorkspaceData<TensorType> workspace_memory; + for (const auto &req : mem_reqs) + { + if (req.size == 0) + { + continue; + } + + const auto aux_info = TensorInfo{TensorShape(req.size), 1, DataType::U8}; + workspace_memory.emplace_back( + WorkspaceDataElement<TensorType>{req.slot, req.lifetime, std::make_unique<TensorType>()}); + + auto aux_tensor = workspace_memory.back().tensor.get(); + ARM_COMPUTE_ERROR_ON_NULLPTR(aux_tensor); + aux_tensor->allocator()->init(aux_info, req.alignment); + + if (req.lifetime == experimental::MemoryLifetime::Temporary) + { + mgroup.manage(aux_tensor); + } + else + { + prep_pack.add_tensor(req.slot, aux_tensor); + } + run_pack.add_tensor(req.slot, aux_tensor); + } + + for (auto &mem : workspace_memory) + { + auto tensor = mem.tensor.get(); + tensor->allocator()->allocate(); + } + + return workspace_memory; +} + +template <typename TensorType> +void release_prepare_tensors(WorkspaceData<TensorType> &workspace, ITensorPack &prep_pack) +{ + workspace.erase(std::remove_if(workspace.begin(), workspace.end(), + [&prep_pack](auto &wk) + { + const bool to_erase = wk.lifetime == experimental::MemoryLifetime::Prepare; + if (to_erase) + { + prep_pack.remove_tensor(wk.slot); + } + return to_erase; + }), + workspace.end()); +} + +/** Utility function to release tensors with lifetime marked as Prepare */ +template <typename TensorType> +void release_temporaries(const experimental::MemoryRequirements &mem_reqs, WorkspaceData<TensorType> &workspace) +{ + for (auto &ws : workspace) + { + const int slot = ws.slot; + for (auto &m : mem_reqs) + { + if (m.slot == slot && m.lifetime == experimental::MemoryLifetime::Prepare) + { + auto tensor = ws.tensor.get(); + tensor->allocator()->free(); + break; + } + } + } +} +} // namespace arm_compute +#endif /* SRC_COMMON_MEMORY_HELPERS_H */ diff --git a/src/core/helpers/NormalizationHelpers.h b/src/core/helpers/NormalizationHelpers.h new file mode 100644 index 0000000000..d94d5e3602 --- /dev/null +++ b/src/core/helpers/NormalizationHelpers.h @@ -0,0 +1,47 @@ +/* +* Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_HELPERS_NORMALIZATIONHELPERS_H +#define SRC_CORE_HELPERS_NORMALIZATIONHELPERS_H + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +/** Calculate the normalization dimension index for a given normalization type + * + * @param[in] layout Data layout of the input and output tensor + * @param[in] info Normalization info + * + * @return Normalization dimension index + */ +inline unsigned int get_normalization_dimension_index(DataLayout layout, const NormalizationLayerInfo &info) +{ + const unsigned int width_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::WIDTH); + const unsigned int channel_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::CHANNEL); + + return info.is_in_map() ? width_idx : channel_idx; +} +} // namespace arm_compute +#endif /* SRC_CORE_HELPERS_NORMALIZATIONHELPERS_H */ diff --git a/src/core/helpers/PoolingHelpers.h b/src/core/helpers/PoolingHelpers.h new file mode 100644 index 0000000000..9ef045f472 --- /dev/null +++ b/src/core/helpers/PoolingHelpers.h @@ -0,0 +1,219 @@ +/* +* Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_HELPERS_POOLINGHELPERS_H +#define SRC_CORE_HELPERS_POOLINGHELPERS_H + +#include "src/core/NEON/NEAsymm.h" + +namespace arm_compute +{ +namespace cpu +{ +namespace +{ + +inline float calculate_avg_scale_pool3d(bool exclude_padding, + const Coordinates &id, + const int pool_size_x, + const int pool_size_y, + const int pool_size_z, + const int upper_bound_w, + const int upper_bound_h, + const int upper_bound_d, + const int pad_x, + const int pad_y, + const int pad_z, + const int stride_x, + const int stride_y, + const int stride_z) +{ + // Based on NDHWC + int start_x = id[1] * stride_x - pad_x; + int start_y = id[2] * stride_y - pad_y; + int start_z = id[3] * stride_z - pad_z; + + const int end_x = std::min(start_x + pool_size_x, upper_bound_w); + const int end_y = std::min(start_y + pool_size_y, upper_bound_h); + const int end_z = std::min(start_z + pool_size_z, upper_bound_d); + if (exclude_padding) + { + start_x = std::max(0, start_x); + start_y = std::max(0, start_y); + start_z = std::max(0, start_z); + } + return 1.f / ((end_y - start_y) * (end_x - start_x) * (end_z - start_z)); +} + +inline float calculate_avg_scale_pool2d(bool exclude_padding, + DataLayout data_layout, + const Coordinates &id, + const int pool_size_x, + const int pool_size_y, + const int upper_bound_w, + const int upper_bound_h, + const int pad_x, + const int pad_y, + const int stride_x, + const int stride_y) +{ + const unsigned int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const unsigned int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + int start_x = id[idx_width] * stride_x - pad_x; + int start_y = id[idx_height] * stride_y - pad_y; + + const int end_x = std::min(start_x + pool_size_x, upper_bound_w); + const int end_y = std::min(start_y + pool_size_y, upper_bound_h); + if (exclude_padding) + { + start_x = std::max(0, start_x); + start_y = std::max(0, start_y); + } + return 1.f / ((end_y - start_y) * (end_x - start_x)); +} + +template <typename T> +inline typename std::enable_if<std::is_same<T, int8_t>::value, int8_t>::type +quantize(float val, const UniformQuantizationInfo &info) +{ + return quantize_qasymm8_signed(val, info); +} + +template <typename T> +inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint8_t>::type +quantize(float val, const UniformQuantizationInfo &info) +{ + return quantize_qasymm8(val, info); +} + +template <typename T> +inline T vcvtq_q32_f32(float32x4_t values); + +template <> +inline uint32x4_t vcvtq_q32_f32(float32x4_t values) +{ + return vcvtq_u32_f32(values); +} + +template <> +inline int32x4_t vcvtq_q32_f32(float32x4_t values) +{ + return vcvtq_s32_f32(values); +} + +template <typename T> +inline float32x4_t vcvtq_f32_q32(T values); + +template <> +inline float32x4_t vcvtq_f32_q32(uint32x4_t values) +{ + return vcvtq_f32_u32(values); +} + +template <> +inline float32x4_t vcvtq_f32_q32(int32x4_t values) +{ + return vcvtq_f32_s32(values); +} + +template <typename Tout> +inline Tout vrequantize_pooling_with_scale(const float32x4x4_t &acc, + const float quant_rescale, + const float scale_pooling, + const int32_t new_offset); + +template <> +inline uint8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc, + const float quant_rescale, + const float scale_pooling, + const int32_t new_offset) +{ + const float new_scale = quant_rescale / scale_pooling; + return vquantize(acc, UniformQuantizationInfo(new_scale, new_offset)); +} + +template <> +inline int8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc, + const float quant_rescale, + const float scale_pooling, + const int32_t new_offset) +{ + const float new_scale = quant_rescale / scale_pooling; + return vquantize_signed(acc, UniformQuantizationInfo(new_scale, new_offset)); +} + +template <typename Tin, typename Tout> +inline Tout vrequantize_pooling(Tin vec1, Tin vec2, const UniformQuantizationInfo &requant_qinfo); + +template <> +inline uint8x16_t vrequantize_pooling(uint8x8_t vec1, uint8x8_t vec2, const UniformQuantizationInfo &requant_qinfo) +{ + const float32x4x4_t acc = {{ + vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec1))))), + vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec1))))), + vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec2))))), + vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec2))))), + }}; + return vquantize(acc, requant_qinfo); +} + +template <> +inline int8x16_t vrequantize_pooling(int8x8_t vec1, int8x8_t vec2, const UniformQuantizationInfo &requant_qinfo) +{ + const float32x4x4_t acc = {{ + vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec1))))), + vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec1))))), + vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec2))))), + vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec2))))), + }}; + return vquantize_signed(acc, requant_qinfo); +} + +template <typename T> +inline T vrequantize_pooling(T &vec, const UniformQuantizationInfo &requant_qinfo); + +template <> +inline uint8x8_t vrequantize_pooling(uint8x8_t &vec, const UniformQuantizationInfo &requant_qinfo) +{ + const float32x4x2_t acc = {{ + vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec))))), + vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec))))), + }}; + return vquantize(acc, requant_qinfo); +} + +template <> +inline int8x8_t vrequantize_pooling(int8x8_t &vec, const UniformQuantizationInfo &requant_qinfo) +{ + const float32x4x2_t acc = {{ + vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec))))), + vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec))))), + }}; + return vquantize_signed(acc, requant_qinfo); +} + +} // namespace +} // namespace cpu +} // namespace arm_compute +#endif /* SRC_CORE_HELPERS_POOLINGHELPERS_H */ diff --git a/src/core/helpers/ScaleHelpers.h b/src/core/helpers/ScaleHelpers.h new file mode 100644 index 0000000000..47605e7385 --- /dev/null +++ b/src/core/helpers/ScaleHelpers.h @@ -0,0 +1,207 @@ +/* +* Copyright (c) 2020-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_HELPERS_SCALEHELPERS_H +#define SRC_CORE_HELPERS_SCALEHELPERS_H + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/QuantizationInfo.h" + +#include <algorithm> +#include <cmath> +#include <cstddef> +#include <cstdint> + +namespace arm_compute +{ +namespace scale_helpers +{ +/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between + * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8 and in single channel format. + * + * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. + * @param[in] stride Stride to access the bottom-left and bottom-right pixel values + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer + * @param[in] iq_info Input QuantizationInfo + * @param[in] oq_info Output QuantizationInfo + * + * @note dx and dy must be in the range [0, 1.0] + * + * @return The bilinear interpolated pixel value + */ +inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, + size_t stride, + float dx, + float dy, + UniformQuantizationInfo iq_info, + UniformQuantizationInfo oq_info) +{ + ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); + + const float dx1 = 1.0f - dx; + const float dy1 = 1.0f - dy; + + const float a00 = dequantize_qasymm8(*pixel_ptr, iq_info); + const float a01 = dequantize_qasymm8(*(pixel_ptr + 1), iq_info); + const float a10 = dequantize_qasymm8(*(pixel_ptr + stride), iq_info); + const float a11 = dequantize_qasymm8(*(pixel_ptr + stride + 1), iq_info); + + const float w1 = dx1 * dy1; + const float w2 = dx * dy1; + const float w3 = dx1 * dy; + const float w4 = dx * dy; + float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; + return static_cast<uint8_t>(quantize_qasymm8(res, oq_info)); +} + +/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between + * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8_SIGNED and in single channel format. + * + * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. + * @param[in] stride Stride to access the bottom-left and bottom-right pixel values + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer + * @param[in] iq_info Input QuantizationInfo + * @param[in] oq_info Output QuantizationInfo + * + * @note dx and dy must be in the range [0, 1.0] + * + * @return The bilinear interpolated pixel value + */ +inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, + size_t stride, + float dx, + float dy, + UniformQuantizationInfo iq_info, + UniformQuantizationInfo oq_info) +{ + ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); + + const float dx1 = 1.0f - dx; + const float dy1 = 1.0f - dy; + + const float a00 = dequantize_qasymm8_signed(*pixel_ptr, iq_info); + const float a01 = dequantize_qasymm8_signed(*(pixel_ptr + 1), iq_info); + const float a10 = dequantize_qasymm8_signed(*(pixel_ptr + stride), iq_info); + const float a11 = dequantize_qasymm8_signed(*(pixel_ptr + stride + 1), iq_info); + + const float w1 = dx1 * dy1; + const float w2 = dx * dy1; + const float w3 = dx1 * dy; + const float w4 = dx * dy; + float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; + return static_cast<int8_t>(quantize_qasymm8_signed(res, oq_info)); +} + +/** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8 + * + * @note The interpolation area depends on the width and height ration of the input and output images + * @note Currently average of the contributing pixels is calculated + * + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in] stride Stride in bytes of the image + * @param[in] width Width of the image + * @param[in] height Height of the image + * @param[in] wr Width ratio among the input image width and output image width. + * @param[in] hr Height ratio among the input image height and output image height. + * @param[in] x X position of the wanted pixel + * @param[in] y Y position of the wanted pixel + * + * @return The pixel at (x, y) using area interpolation. + */ +inline uint8_t pixel_area_c1u8_clamp( + const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + // Calculate sampling position + float in_x = (x + 0.5f) * wr - 0.5f; + float in_y = (y + 0.5f) * hr - 0.5f; + + // Get bounding box offsets + int x_from = std::floor(x * wr - 0.5f - in_x); + int y_from = std::floor(y * hr - 0.5f - in_y); + int x_to = std::ceil((x + 1) * wr - 0.5f - in_x); + int y_to = std::ceil((y + 1) * hr - 0.5f - in_y); + + // Clamp position to borders + in_x = std::max(-1.f, std::min(in_x, static_cast<float>(width))); + in_y = std::max(-1.f, std::min(in_y, static_cast<float>(height))); + + // Clamp bounding box offsets to borders + x_from = ((in_x + x_from) < -1) ? -1 : x_from; + y_from = ((in_y + y_from) < -1) ? -1 : y_from; + x_to = ((in_x + x_to) > width) ? (width - in_x) : x_to; + y_to = ((in_y + y_to) > height) ? (height - in_y) : y_to; + + // Get pixel index + const int xi = std::floor(in_x); + const int yi = std::floor(in_y); + + // Bounding box elements in each dimension + const int x_elements = (x_to - x_from + 1); + const int y_elements = (y_to - y_from + 1); + ARM_COMPUTE_ERROR_ON(x_elements == 0 || y_elements == 0); + + // Sum pixels in area + int sum = 0; + for (int j = yi + y_from, je = yi + y_to; j <= je; ++j) + { + const uint8_t *ptr = first_pixel_ptr + j * stride + xi + x_from; + sum = std::accumulate(ptr, ptr + x_elements, sum); + } + + // Return average + return sum / (x_elements * y_elements); +} + +/** Computes bilinear interpolation using the top-left, top-right, bottom-left, bottom-right pixels and the pixel's distance between + * the real coordinates and the smallest following integer coordinates. + * + * @param[in] a00 The top-left pixel value. + * @param[in] a01 The top-right pixel value. + * @param[in] a10 The bottom-left pixel value. + * @param[in] a11 The bottom-right pixel value. + * @param[in] dx_val Pixel's distance between the X real coordinate and the smallest X following integer + * @param[in] dy_val Pixel's distance between the Y real coordinate and the smallest Y following integer + * + * @note dx and dy must be in the range [0, 1.0] + * + * @return The bilinear interpolated pixel value + */ +inline float delta_bilinear(float a00, float a01, float a10, float a11, float dx_val, float dy_val) +{ + const float dx1_val = 1.0f - dx_val; + const float dy1_val = 1.0f - dy_val; + + const float w1 = dx1_val * dy1_val; + const float w2 = dx_val * dy1_val; + const float w3 = dx1_val * dy_val; + const float w4 = dx_val * dy_val; + return a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; +} +} // namespace scale_helpers +} // namespace arm_compute + +#endif /* SRC_CORE_HELPERS_SCALEHELPERS_H */ diff --git a/src/core/helpers/SoftmaxHelpers.cpp b/src/core/helpers/SoftmaxHelpers.cpp new file mode 100644 index 0000000000..8184991ab5 --- /dev/null +++ b/src/core/helpers/SoftmaxHelpers.cpp @@ -0,0 +1,45 @@ +/* +* Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/helpers/SoftmaxHelpers.h" + +namespace arm_compute +{ +namespace softmax_helpers +{ +PermutationVector get_permutation_vector_from_softmax_axis(size_t axis) +{ + switch (axis) + { + case 1: + return PermutationVector(1U, 0U, 2U, 3U); + case 2: + return PermutationVector(2U, 1U, 0U, 3U); + case 3: + return PermutationVector(3U, 1U, 2U, 0U); + default: + ARM_COMPUTE_ERROR("Axis not supported"); + } +} +} // namespace softmax_helpers +} // namespace arm_compute diff --git a/src/core/helpers/SoftmaxHelpers.h b/src/core/helpers/SoftmaxHelpers.h new file mode 100644 index 0000000000..de5490a14d --- /dev/null +++ b/src/core/helpers/SoftmaxHelpers.h @@ -0,0 +1,50 @@ +/* +* Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_HELPERS_SOFTMAXHELPERS_H +#define SRC_CORE_HELPERS_SOFTMAXHELPERS_H + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +namespace softmax_helpers +{ +/** Given a softmax axis, this function returns the permutation vector required to put the axis to the front + * + * @note This function assumes a tensor rank <= 4 + * + * Axis selects the dimension on which softmax is performed. + * E.g. For input of shape 4x5x6 and axis=1, softmax will be applied to 4x6=24 vectors of size 5. + * Interally softmax kernels is always performed on the first dimension (front dimension), therefore permutation is + * required to put the dimension specified by @p axis to the first dimension. + * + * @param[in] axis Axis on which to perform softmax. Supported: 1, 2, 3 (0 implies no permutation needed) + * + * @return the permutation vector + */ +PermutationVector get_permutation_vector_from_softmax_axis(size_t axis); +} // namespace softmax_helpers +} // namespace arm_compute + +#endif /* SRC_CORE_HELPERS_SOFTMAXHELPERS_H */ diff --git a/src/core/helpers/Utils.cpp b/src/core/helpers/Utils.cpp new file mode 100644 index 0000000000..f8895d8a3c --- /dev/null +++ b/src/core/helpers/Utils.cpp @@ -0,0 +1,49 @@ +/* +* Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/helpers/Utils.h" + +namespace arm_compute +{ +bool has_holes(const ITensorInfo &info) +{ + return has_holes(info, info.num_dimensions() - 1); +} + +bool has_holes(const ITensorInfo &info, size_t dimension) +{ + const auto &shape = info.tensor_shape(); + const auto &strides = info.strides_in_bytes(); + size_t squashed_bytes = info.element_size(); + + for (size_t dim = 0; dim <= dimension; ++dim) + { + if (strides[dim] != squashed_bytes) + { + return true; + } + squashed_bytes *= shape[dim]; + } + return false; +} +} // namespace arm_compute diff --git a/src/core/helpers/Utils.h b/src/core/helpers/Utils.h new file mode 100644 index 0000000000..a17a78f7ee --- /dev/null +++ b/src/core/helpers/Utils.h @@ -0,0 +1,120 @@ +/* +* Copyright (c) 2020-2021, 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_CORE_HELPERS_UTILS_H +#define ACL_SRC_CORE_HELPERS_UTILS_H + +#include "arm_compute/core/ITensorInfo.h" + +namespace arm_compute +{ +/** Create a strides object based on the provided strides and the tensor dimensions. + * + * @param[in] info Tensor info object providing the shape of the tensor for unspecified strides. + * @param[in] stride_x Stride to be used in X dimension (in bytes). + * @param[in] fixed_strides Strides to be used in higher dimensions starting at Y (in bytes). + * + * @return Strides object based on the specified strides. Missing strides are + * calculated based on the tensor shape and the strides of lower dimensions. + */ +template <typename T, typename... Ts> +inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&...fixed_strides) +{ + const TensorShape &shape = info.tensor_shape(); + + // Create strides object + Strides strides(stride_x, fixed_strides...); + + for (size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i) + { + strides.set(i, shape[i - 1] * strides[i - 1]); + } + + return strides; +} + +/** Create a strides object based on the tensor dimensions. + * + * @param[in] info Tensor info object used to compute the strides. + * + * @return Strides object based on element size and tensor shape. + */ +template <typename... Ts> +inline Strides compute_strides(const ITensorInfo &info) +{ + return compute_strides(info, info.element_size()); +} + +/** Given an integer value, this function returns the next power of two + * + * @param[in] x Input value + * + * @return the next power of two + */ +inline unsigned int get_next_power_two(unsigned int x) +{ + // Decrement by 1 + x--; + + // Shift right by 1 + x |= x >> 1u; + // Shift right by 2 + x |= x >> 2u; + // Shift right by 4 + x |= x >> 4u; + // Shift right by 8 + x |= x >> 8u; + // Shift right by 16 + x |= x >> 16u; + + // Increment by 1 + x++; + + return x; +} + +/** Check if the tensor has any holes. + * + * A hole is defined as any gap in the tensor between two consecutive values. This can be a result of extending + * the paddings or manipulating the strides of the tensor + * + * @param[in] info Tensor info object defining the shape of the input tensor. + * + * @note This function checks for holes in all dimensions. + * + */ +bool has_holes(const ITensorInfo &info); + +/** Check if the tensor has any holes. + * + * @param[in] info Tensor info object defining the shape of the input tensor. + * @param[in] dimension Highest dimension to check. + * + * @note This function checks for holes in all the dimensions upto and including the highest dimension. + * + */ +bool has_holes(const ITensorInfo &info, size_t dimension); + +} // namespace arm_compute + +#endif // ACL_SRC_CORE_HELPERS_UTILS_H diff --git a/src/core/helpers/WindowHelpers.cpp b/src/core/helpers/WindowHelpers.cpp new file mode 100644 index 0000000000..30a55fcbc6 --- /dev/null +++ b/src/core/helpers/WindowHelpers.cpp @@ -0,0 +1,349 @@ +/* +* Copyright (c) 2020-2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/helpers/WindowHelpers.h" + +namespace arm_compute +{ +Window +calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size) +{ + if (!skip_border) + { + border_size = BorderSize(0); + } + + const Coordinates &anchor = valid_region.anchor; + const TensorShape &shape = valid_region.shape; + + Window window; + + window.set(0, Window::Dimension( + // Skip the border left of the image + anchor[0] + border_size.left, + // Skip the border right of the image + // Make sure the window width is a multiple of the step size + anchor[0] + border_size.left + + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - + static_cast<int>(border_size.right)), + steps[0]), + steps[0])); + + size_t n = 1; + + if (anchor.num_dimensions() > 1) + { + window.set(1, + Window::Dimension( + // Skip the border above the image + anchor[1] + border_size.top, + // Skip the border below the image + anchor[1] + border_size.top + + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - static_cast<int>(border_size.top) - + static_cast<int>(border_size.bottom)), + steps[1]), + steps[1])); + + ++n; + } + + if (anchor.num_dimensions() > 2) + { + window.set(2, Window::Dimension(anchor[2], std::max<size_t>(1, shape[2]), steps[2])); + + ++n; + } + + for (; n < anchor.num_dimensions(); ++n) + { + window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n]))); + } + + for (; n < Coordinates::num_max_dimensions; ++n) + { + window.set(n, Window::Dimension(0, 1)); + } + + return window; +} + +Window calculate_max_window(const TensorShape &shape, const Steps &steps, bool skip_border, BorderSize border_size) +{ + if (!skip_border) + { + border_size = BorderSize(0); + } + + Window window; + + window.set(0, Window::Dimension( + // Skip the border left of the image + border_size.left, + // Skip the border right of the image + // Make sure the window width is a multiple of the step size + border_size.left + + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - + static_cast<int>(border_size.right)), + steps[0]), + steps[0])); + + size_t n = 1; + + if (shape.num_dimensions() > 1) + { + window.set(1, Window::Dimension( + // Skip the border above the image + border_size.top, + // Skip the border below the image + border_size.top + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - + static_cast<int>(border_size.top) - + static_cast<int>(border_size.bottom)), + steps[1]), + steps[1])); + + ++n; + } + + if (shape.num_dimensions() > 2) + { + window.set(2, Window::Dimension(0, std::max<size_t>(1, shape[2]), steps[2])); + + ++n; + } + + for (; n < shape.num_dimensions(); ++n) + { + window.set(n, Window::Dimension(0, std::max<size_t>(1, shape[n]))); + } + + for (; n < Coordinates::num_max_dimensions; ++n) + { + window.set(n, Window::Dimension(0, 1)); + } + + return window; +} + +Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps, BorderSize border_size) +{ + const Coordinates &anchor = valid_region.anchor; + const TensorShape &shape = valid_region.shape; + + Window window; + + window.set(0, Window::Dimension( + // move the anchor to the start from the border + anchor[0] - border_size.left, + // move the anchor to include the right end border + // Make sure the window width is a multiple of the step size + anchor[0] - border_size.left + + ceil_to_multiple(shape[0] + border_size.left + border_size.right, steps[0]), + steps[0])); + + size_t n = 1; + + if (anchor.num_dimensions() > 1) + { + window.set(1, Window::Dimension( + // Include the border above the image + anchor[1] - border_size.top, + // Include the border below the image + anchor[1] - border_size.top + + ceil_to_multiple(shape[1] + border_size.top + border_size.bottom, steps[1]), + steps[1])); + + ++n; + } + + if (anchor.num_dimensions() > 2) + { + window.set(2, Window::Dimension(0, std::max<size_t>(1, shape[n]), steps[2])); + + ++n; + } + + for (; n < anchor.num_dimensions(); ++n) + { + window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n]))); + } + + for (; n < Coordinates::num_max_dimensions; ++n) + { + window.set(n, Window::Dimension(0, 1)); + } + + return window; +} + +Window calculate_max_window_horizontal(const ValidRegion &valid_region, + const Steps &steps, + bool skip_border, + BorderSize border_size) +{ + if (skip_border) + { + border_size.top = 0; + border_size.bottom = 0; + } + else + { + border_size.left = 0; + border_size.right = 0; + } + + const Coordinates &anchor = valid_region.anchor; + const TensorShape &shape = valid_region.shape; + + Window window; + + window.set(0, Window::Dimension( + // Skip the border left of the image + anchor[0] + border_size.left, + // Skip the border right of the image + // Make sure the window width is a multiple of the step size + anchor[0] + border_size.left + + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - + static_cast<int>(border_size.right)), + steps[0]), + steps[0])); + + size_t n = 1; + + if (anchor.num_dimensions() > 1) + { + window.set(1, Window::Dimension( + // Skip the border above the image + anchor[1] - border_size.top, + // Skip the border below the image + anchor[1] + shape[1] + border_size.bottom, 1)); + + ++n; + } + + for (; n < anchor.num_dimensions(); ++n) + { + window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n]))); + } + + for (; n < Coordinates::num_max_dimensions; ++n) + { + window.set(n, Window::Dimension(0, 1)); + } + + return window; +} + +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1) +{ + const auto &shape0 = src0.tensor_shape(); + const auto &shape1 = src1.tensor_shape(); + const auto &strides0 = src0.strides_in_bytes(); + const auto &strides1 = src1.strides_in_bytes(); + const auto num_dimensions = std::max(src0.num_dimensions(), src1.num_dimensions()); + + Window win; + size_t split_dimension = Window::DimY; + size_t dim = 0; + + size_t squashed_bytes = src0.element_size(); + + // Try to squash the low dimensions together. + for (; dim < num_dimensions; ++dim) + { + if (shape0[dim] != shape1[dim] || strides0[dim] != squashed_bytes || strides1[dim] != squashed_bytes) + { + break; + } + + squashed_bytes *= shape0[dim]; + } + + if (dim == num_dimensions) + { + auto squashed_elements = squashed_bytes / src0.element_size(); + + split_dimension = Window::DimX; + + // The input tensors can be interpreted as 1D array. + win.set(0, Window::Dimension(0, squashed_elements, 1)); + + for (dim = 1; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, 1, 1)); + } + } + else + { + // Generates the max window. + for (dim = 0; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, std::max(shape0[dim], shape1[dim]), 1)); + } + } + + return std::make_pair(win, split_dimension); +} + +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src) +{ + const auto &shape = src.tensor_shape(); + const auto &strides = src.strides_in_bytes(); + const auto num_dimensions = src.num_dimensions(); + + Window win; + size_t split_dimension = Window::DimY; + size_t dim = 0; + size_t squashed_bytes = src.element_size(); + + // Try to squash the low dimensions together. + for (; dim < num_dimensions; ++dim) + { + if (strides[dim] != squashed_bytes) + { + break; + } + squashed_bytes *= shape[dim]; + } + if (dim == num_dimensions) + { + const auto squashed_elements = squashed_bytes / src.element_size(); + split_dimension = Window::DimX; + // The input tensor can be interpreted as 1D array. + win.set(0, Window::Dimension(0, squashed_elements, 1)); + for (dim = 1; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, 1, 1)); + } + } + else + { + // Generate the max window. + for (dim = 0; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, shape[dim], 1)); + } + } + return std::make_pair(win, split_dimension); +} + +} // namespace arm_compute diff --git a/src/core/helpers/WindowHelpers.h b/src/core/helpers/WindowHelpers.h new file mode 100644 index 0000000000..e404c18e8a --- /dev/null +++ b/src/core/helpers/WindowHelpers.h @@ -0,0 +1,230 @@ +/* +* Copyright (c) 2020-2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_HELPERS_WINDOWHELPERS_H +#define SRC_CORE_HELPERS_WINDOWHELPERS_H + +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/Steps.h" +#include "arm_compute/core/Window.h" + +namespace arm_compute +{ +/** Update window and padding size for each of the access patterns. + * + * First the window size is reduced based on all access patterns that are not + * allowed to modify the padding of the underlying tensor. Then the padding of + * the remaining tensors is increased to match the window. + * + * @param[in] win Window that is used by the kernel. + * @param[in] patterns Access patterns used to calculate the final window and padding. + * + * @return True if the window has been changed. Changes to the padding do not + * influence the returned value. + */ +template <typename... Ts> +bool update_window_and_padding(Window &win, Ts &&...patterns) +{ + bool window_changed = false; + + utility::for_each([&](const IAccessWindow &w) { window_changed |= w.update_window_if_needed(win); }, patterns...); + + utility::for_each([&](IAccessWindow &w) { w.update_padding_if_needed(win); }, patterns...); + + return window_changed; +} + +/** Intersect multiple valid regions. + * + * @param[in] regions Valid regions. + * + * @return Intersection of all regions. + */ +template <typename... Ts> +ValidRegion intersect_valid_regions(const Ts &...regions) +{ + auto intersect = [](const ValidRegion &r1, const ValidRegion &r2) -> ValidRegion + { + ValidRegion region; + + for (size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d) + { + region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d])); + } + + for (size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d) + { + region.shape.set(d, std::min(r1.shape[d], r2.shape[d])); + } + + return region; + }; + + return utility::foldl(intersect, regions...); +} + +#ifndef DOXYGEN_SKIP_THIS +/** Calculate the maximum window for a given tensor shape and border setting + * + * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_window(const ValidRegion &valid_region, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()); + +/** Calculate the maximum window for a given tensor shape and border setting + * + * @param[in] shape Shape of the tensor space + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_window(const TensorShape &shape, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()); + +/** Calculate the maximum window for a given tensor shape and border setting + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. + * + * @return The maximum window the kernel can be executed on. + */ +inline Window calculate_max_window(const ITensorInfo &info, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()) +{ + return calculate_max_window(info.tensor_shape(), steps, skip_border, border_size); +} + +/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting + * + * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. The border region will be excluded from the window. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_window_horizontal(const ValidRegion &valid_region, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()); + +/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. + * + * @return The maximum window the kernel can be executed on. + */ +inline Window calculate_max_window_horizontal(const ITensorInfo &info, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()) +{ + return calculate_max_window_horizontal(info.valid_region(), steps, skip_border, border_size); +} + +/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border. + * + * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] border_size (Optional) Border size. The border region will be included in the window. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_enlarged_window(const ValidRegion &valid_region, + const Steps &steps = Steps(), + BorderSize border_size = BorderSize()); + +/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border. + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] border_size (Optional) Border size. The border region will be included in the window. + * + * @return The maximum window the kernel can be executed on. + */ +inline Window calculate_max_enlarged_window(const ITensorInfo &info, + const Steps &steps = Steps(), + BorderSize border_size = BorderSize()) +{ + return calculate_max_enlarged_window(info.valid_region(), steps, border_size); +} + +/** Calculate the squashed or maximum window for the given tensor shape. + * + * If the tensor data resides continuously in the memory, the tensor can be interpreted + * as 1D array and all the dimensions can be squashed together into the x-dimension. + * Otherwise, generate the max window for the given tensor shape. + * + * @param[in] src Tensor info object defining the shape of the input tensor. + * + * @return The maximum window the kernel can be executed on and the preferred split dimension. + */ +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src); + +/** Calculate the squashed or maximum window for the given tensor shapes. + * + * If the tensor data resides continuously in the memory, the tensor can be interpreted + * as 1D array and all the dimensions can be squashed together into the x-dimension. + * Otherwise, generate the max window for the given tensor shapes. + * + * @param[in] src0 Tensor info object defining the shape of the first input tensor. + * @param[in] src1 Tensor info object defining the shape of the second input tensor. + * + * @return The squashed or maximum window the kernel can be executed on and the preferred split dimension. + */ +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1); + +/** Function to compute the shape of output and window for the given inputs + * + * @param[in] infos Input tensor informations + * + * @return A pair of the shape and window + */ +template <typename... Shapes> +std::pair<TensorShape, Window> compute_output_shape_and_window(const Shapes &...shapes) +{ + const TensorShape out_shape = TensorShape::broadcast_shape(shapes...); + return std::make_pair(out_shape, calculate_max_window(out_shape)); +} +#endif /* DOXYGEN_SKIP_THIS */ +} // namespace arm_compute + +#endif /* SRC_CORE_HELPERS_WINDOWHELPERS_H */ |