diff options
Diffstat (limited to 'src/core/helpers')
-rw-r--r-- | src/core/helpers/AutoConfiguration.h | 39 | ||||
-rw-r--r-- | src/core/helpers/LUTManager.cpp | 100 | ||||
-rw-r--r-- | src/core/helpers/LUTManager.h | 77 | ||||
-rw-r--r-- | src/core/helpers/MemoryHelpers.h | 139 | ||||
-rw-r--r-- | src/core/helpers/PoolingHelpers.h | 219 | ||||
-rw-r--r-- | src/core/helpers/ScaleHelpers.h | 208 | ||||
-rw-r--r-- | src/core/helpers/SoftmaxHelpers.cpp | 2 | ||||
-rw-r--r-- | src/core/helpers/Utils.cpp | 49 | ||||
-rw-r--r-- | src/core/helpers/Utils.h | 35 | ||||
-rw-r--r-- | src/core/helpers/WindowHelpers.cpp | 258 | ||||
-rw-r--r-- | src/core/helpers/WindowHelpers.h | 108 |
11 files changed, 974 insertions, 260 deletions
diff --git a/src/core/helpers/AutoConfiguration.h b/src/core/helpers/AutoConfiguration.h index 6880a6cb66..9df2a76983 100644 --- a/src/core/helpers/AutoConfiguration.h +++ b/src/core/helpers/AutoConfiguration.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2020 Arm Limited. +* Copyright (c) 2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/DataTypeUtils.h" namespace arm_compute { @@ -41,10 +42,11 @@ namespace arm_compute */ inline bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, - int num_channels, DataType data_type, - QuantizationInfo quantization_info = QuantizationInfo()) + int num_channels, + DataType data_type, + QuantizationInfo quantization_info = QuantizationInfo()) { - if(info.tensor_shape().total_size() == 0) + if (info.tensor_shape().total_size() == 0) { info.set_data_type(data_type); info.set_num_channels(num_channels); @@ -57,21 +59,26 @@ inline bool auto_init_if_empty(ITensorInfo &info, } /** Auto initialize the tensor info using another tensor info. -* -* @param info_sink Tensor info used to check and assign -* @param info_source Tensor info used to assign -* -* @return True if the tensor info has been initialized -*/ + * + * (COMPMID-6012) This method should remain in sync with the fields of ITensorInfo that have setters. + * + * + * @param info_sink Tensor info used to check and assign + * @param info_source Tensor info used to assign + * + * + * @return True if the tensor info has been initialized + */ inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source) { - if(info_sink.tensor_shape().total_size() == 0) + if (info_sink.tensor_shape().total_size() == 0) { info_sink.set_data_type(info_source.data_type()); info_sink.set_num_channels(info_source.num_channels()); info_sink.set_tensor_shape(info_source.tensor_shape()); info_sink.set_quantization_info(info_source.quantization_info()); info_sink.set_data_layout(info_source.data_layout()); + info_sink.set_are_values_constant(info_source.are_values_constant()); return true; } @@ -87,7 +94,7 @@ inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_s */ inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape) { - if(info.tensor_shape().total_size() == 0) + if (info.tensor_shape().total_size() == 0) { info.set_tensor_shape(shape); return true; @@ -106,7 +113,7 @@ inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape) */ inline bool set_format_if_unknown(ITensorInfo &info, Format format) { - if(info.data_type() == DataType::UNKNOWN) + if (info.data_type() == DataType::UNKNOWN) { info.set_format(format); return true; @@ -125,7 +132,7 @@ inline bool set_format_if_unknown(ITensorInfo &info, Format format) */ inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type) { - if(info.data_type() == DataType::UNKNOWN) + if (info.data_type() == DataType::UNKNOWN) { info.set_data_type(data_type); return true; @@ -144,7 +151,7 @@ inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type) */ inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout) { - if(info.data_layout() == DataLayout::UNKNOWN) + if (info.data_layout() == DataLayout::UNKNOWN) { info.set_data_layout(data_layout); return true; @@ -163,7 +170,7 @@ inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout */ inline bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info) { - if(info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type()))) + if (info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type()))) { info.set_quantization_info(quantization_info); return true; diff --git a/src/core/helpers/LUTManager.cpp b/src/core/helpers/LUTManager.cpp new file mode 100644 index 0000000000..2effffbe92 --- /dev/null +++ b/src/core/helpers/LUTManager.cpp @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/core/helpers/LUTManager.h" + +namespace arm_compute +{ +#ifdef __aarch64__ +namespace +{ + +float16_t activation(float16_t x, const LUTInfo &info) +{ + float16_t out = 0.f; + switch (info.act) + { + case ActivationLayerInfo::ActivationFunction::LOGISTIC: + out = 1.f / (1.f + std::exp(-x)); + break; + case ActivationLayerInfo::ActivationFunction::TANH: + { + out = static_cast<float16_t>(info.alpha * std::tanh(info.beta * x)); + break; + } + default: + ARM_COMPUTE_ERROR("Unsupported Activation for 16-bit LUT table"); + break; + } + return out; +} + +void init_lut_fp16(ActivationLayerInfo::LookupTable65536 *lut, const LUTInfo &info) +{ + union Element + { + uint16_t i = 0; + float16_t fp; + } item; + + // Fill lut by iterating over all 16 bit values using the union. + while (true) + { + (*lut)[item.i] = activation(item.fp, info); + if (item.i == 65535) + break; + item.i++; + } +} +} // namespace + +std::shared_ptr<ActivationLayerInfo::LookupTable65536> LUTManager::get_lut_table(LUTInfo info) +{ + const auto itr = map_fp16.find(info); + auto s_ptr = (itr != map_fp16.end()) ? itr->second.lock() : nullptr; // nullptr if invalid or not found. + if (s_ptr != nullptr) + { + // Found and valid + return s_ptr; // Return weak ptr as shared ptr + } + else + { + // Not found, or pointer not valid + // We do not use make_shared to prevent the weak_ptr keeping the control block alive + std::shared_ptr<ActivationLayerInfo::LookupTable65536> ptr(new ActivationLayerInfo::LookupTable65536); + init_lut_fp16(ptr.get(), info); + map_fp16[info] = ptr; + return ptr; + } +} +#endif // __aarch64__ + +// Static function to get LutManager instance +LUTManager &LUTManager::get_instance() +{ + static auto inst_ = std::make_unique<LUTManager>(); // The one, single instance. + return *inst_; +} + +} // namespace arm_compute diff --git a/src/core/helpers/LUTManager.h b/src/core/helpers/LUTManager.h new file mode 100644 index 0000000000..f3f4bf2832 --- /dev/null +++ b/src/core/helpers/LUTManager.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_SRC_CORE_HELPERS_LUTMANAGER_H +#define ACL_SRC_CORE_HELPERS_LUTMANAGER_H + +#include "arm_compute/core/CoreTypes.h" +#include "arm_compute/core/QuantizationInfo.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" + +#include <map> +#include <memory> + +namespace arm_compute +{ + +struct LUTInfo +{ + ActivationLayerInfo::ActivationFunction act; + float alpha; + float beta; + DataType dt; + UniformQuantizationInfo qinfo; + + // Operators enable use of map with Lutinfo as key + friend bool operator<(const LUTInfo &l, const LUTInfo &r) + { + const auto l_tup = std::make_tuple(l.act, l.alpha, l.beta, l.dt, l.qinfo.scale, l.qinfo.offset); + const auto r_tup = std::make_tuple(r.act, r.alpha, r.beta, r.dt, r.qinfo.scale, r.qinfo.offset); + + return l_tup < r_tup; + } + bool operator==(const LUTInfo &l) const + { + return this->act == l.act && this->alpha == l.alpha && this->beta == l.beta && this->dt == l.dt && + this->qinfo == l.qinfo; + } +}; + +/* Class to handle getting look up table */ +class LUTManager +{ +public: + LUTManager() = default; + + static LUTManager &get_instance(); +#ifdef __aarch64__ + std::shared_ptr<ActivationLayerInfo::LookupTable65536> get_lut_table(LUTInfo info); + +private: + std::map<LUTInfo, std::weak_ptr<ActivationLayerInfo::LookupTable65536>> map_fp16{}; +#endif // __aarch64__ +}; + +} // namespace arm_compute +#endif // ACL_SRC_CORE_HELPERS_LUTMANAGER_H diff --git a/src/core/helpers/MemoryHelpers.h b/src/core/helpers/MemoryHelpers.h new file mode 100644 index 0000000000..dd094b414c --- /dev/null +++ b/src/core/helpers/MemoryHelpers.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_COMMON_MEMORY_HELPERS_H +#define SRC_COMMON_MEMORY_HELPERS_H + +#include "arm_compute/core/experimental/Types.h" +#include "arm_compute/core/ITensorPack.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/MemoryGroup.h" + +#include <memory> +#include <utility> +#include <vector> + +namespace arm_compute +{ +inline int offset_int_vec(int offset) +{ + return ACL_INT_VEC + offset; +} + +template <typename TensorType> +struct WorkspaceDataElement +{ + int slot{-1}; + experimental::MemoryLifetime lifetime{experimental::MemoryLifetime::Temporary}; + std::unique_ptr<TensorType> tensor{nullptr}; +}; + +template <typename TensorType> +using WorkspaceData = std::vector<WorkspaceDataElement<TensorType>>; + +template <typename TensorType> +WorkspaceData<TensorType> +manage_workspace(const experimental::MemoryRequirements &mem_reqs, MemoryGroup &mgroup, ITensorPack &run_pack) +{ + ITensorPack dummy_pack = ITensorPack(); + return manage_workspace<TensorType>(mem_reqs, mgroup, run_pack, dummy_pack); +} + +template <typename TensorType> +WorkspaceData<TensorType> manage_workspace(const experimental::MemoryRequirements &mem_reqs, + MemoryGroup &mgroup, + ITensorPack &run_pack, + ITensorPack &prep_pack) +{ + WorkspaceData<TensorType> workspace_memory; + for (const auto &req : mem_reqs) + { + if (req.size == 0) + { + continue; + } + + const auto aux_info = TensorInfo{TensorShape(req.size), 1, DataType::U8}; + workspace_memory.emplace_back( + WorkspaceDataElement<TensorType>{req.slot, req.lifetime, std::make_unique<TensorType>()}); + + auto aux_tensor = workspace_memory.back().tensor.get(); + ARM_COMPUTE_ERROR_ON_NULLPTR(aux_tensor); + aux_tensor->allocator()->init(aux_info, req.alignment); + + if (req.lifetime == experimental::MemoryLifetime::Temporary) + { + mgroup.manage(aux_tensor); + } + else + { + prep_pack.add_tensor(req.slot, aux_tensor); + } + run_pack.add_tensor(req.slot, aux_tensor); + } + + for (auto &mem : workspace_memory) + { + auto tensor = mem.tensor.get(); + tensor->allocator()->allocate(); + } + + return workspace_memory; +} + +template <typename TensorType> +void release_prepare_tensors(WorkspaceData<TensorType> &workspace, ITensorPack &prep_pack) +{ + workspace.erase(std::remove_if(workspace.begin(), workspace.end(), + [&prep_pack](auto &wk) + { + const bool to_erase = wk.lifetime == experimental::MemoryLifetime::Prepare; + if (to_erase) + { + prep_pack.remove_tensor(wk.slot); + } + return to_erase; + }), + workspace.end()); +} + +/** Utility function to release tensors with lifetime marked as Prepare */ +template <typename TensorType> +void release_temporaries(const experimental::MemoryRequirements &mem_reqs, WorkspaceData<TensorType> &workspace) +{ + for (auto &ws : workspace) + { + const int slot = ws.slot; + for (auto &m : mem_reqs) + { + if (m.slot == slot && m.lifetime == experimental::MemoryLifetime::Prepare) + { + auto tensor = ws.tensor.get(); + tensor->allocator()->free(); + break; + } + } + } +} +} // namespace arm_compute +#endif /* SRC_COMMON_MEMORY_HELPERS_H */ diff --git a/src/core/helpers/PoolingHelpers.h b/src/core/helpers/PoolingHelpers.h new file mode 100644 index 0000000000..9ef045f472 --- /dev/null +++ b/src/core/helpers/PoolingHelpers.h @@ -0,0 +1,219 @@ +/* +* Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_HELPERS_POOLINGHELPERS_H +#define SRC_CORE_HELPERS_POOLINGHELPERS_H + +#include "src/core/NEON/NEAsymm.h" + +namespace arm_compute +{ +namespace cpu +{ +namespace +{ + +inline float calculate_avg_scale_pool3d(bool exclude_padding, + const Coordinates &id, + const int pool_size_x, + const int pool_size_y, + const int pool_size_z, + const int upper_bound_w, + const int upper_bound_h, + const int upper_bound_d, + const int pad_x, + const int pad_y, + const int pad_z, + const int stride_x, + const int stride_y, + const int stride_z) +{ + // Based on NDHWC + int start_x = id[1] * stride_x - pad_x; + int start_y = id[2] * stride_y - pad_y; + int start_z = id[3] * stride_z - pad_z; + + const int end_x = std::min(start_x + pool_size_x, upper_bound_w); + const int end_y = std::min(start_y + pool_size_y, upper_bound_h); + const int end_z = std::min(start_z + pool_size_z, upper_bound_d); + if (exclude_padding) + { + start_x = std::max(0, start_x); + start_y = std::max(0, start_y); + start_z = std::max(0, start_z); + } + return 1.f / ((end_y - start_y) * (end_x - start_x) * (end_z - start_z)); +} + +inline float calculate_avg_scale_pool2d(bool exclude_padding, + DataLayout data_layout, + const Coordinates &id, + const int pool_size_x, + const int pool_size_y, + const int upper_bound_w, + const int upper_bound_h, + const int pad_x, + const int pad_y, + const int stride_x, + const int stride_y) +{ + const unsigned int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const unsigned int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + int start_x = id[idx_width] * stride_x - pad_x; + int start_y = id[idx_height] * stride_y - pad_y; + + const int end_x = std::min(start_x + pool_size_x, upper_bound_w); + const int end_y = std::min(start_y + pool_size_y, upper_bound_h); + if (exclude_padding) + { + start_x = std::max(0, start_x); + start_y = std::max(0, start_y); + } + return 1.f / ((end_y - start_y) * (end_x - start_x)); +} + +template <typename T> +inline typename std::enable_if<std::is_same<T, int8_t>::value, int8_t>::type +quantize(float val, const UniformQuantizationInfo &info) +{ + return quantize_qasymm8_signed(val, info); +} + +template <typename T> +inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint8_t>::type +quantize(float val, const UniformQuantizationInfo &info) +{ + return quantize_qasymm8(val, info); +} + +template <typename T> +inline T vcvtq_q32_f32(float32x4_t values); + +template <> +inline uint32x4_t vcvtq_q32_f32(float32x4_t values) +{ + return vcvtq_u32_f32(values); +} + +template <> +inline int32x4_t vcvtq_q32_f32(float32x4_t values) +{ + return vcvtq_s32_f32(values); +} + +template <typename T> +inline float32x4_t vcvtq_f32_q32(T values); + +template <> +inline float32x4_t vcvtq_f32_q32(uint32x4_t values) +{ + return vcvtq_f32_u32(values); +} + +template <> +inline float32x4_t vcvtq_f32_q32(int32x4_t values) +{ + return vcvtq_f32_s32(values); +} + +template <typename Tout> +inline Tout vrequantize_pooling_with_scale(const float32x4x4_t &acc, + const float quant_rescale, + const float scale_pooling, + const int32_t new_offset); + +template <> +inline uint8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc, + const float quant_rescale, + const float scale_pooling, + const int32_t new_offset) +{ + const float new_scale = quant_rescale / scale_pooling; + return vquantize(acc, UniformQuantizationInfo(new_scale, new_offset)); +} + +template <> +inline int8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc, + const float quant_rescale, + const float scale_pooling, + const int32_t new_offset) +{ + const float new_scale = quant_rescale / scale_pooling; + return vquantize_signed(acc, UniformQuantizationInfo(new_scale, new_offset)); +} + +template <typename Tin, typename Tout> +inline Tout vrequantize_pooling(Tin vec1, Tin vec2, const UniformQuantizationInfo &requant_qinfo); + +template <> +inline uint8x16_t vrequantize_pooling(uint8x8_t vec1, uint8x8_t vec2, const UniformQuantizationInfo &requant_qinfo) +{ + const float32x4x4_t acc = {{ + vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec1))))), + vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec1))))), + vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec2))))), + vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec2))))), + }}; + return vquantize(acc, requant_qinfo); +} + +template <> +inline int8x16_t vrequantize_pooling(int8x8_t vec1, int8x8_t vec2, const UniformQuantizationInfo &requant_qinfo) +{ + const float32x4x4_t acc = {{ + vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec1))))), + vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec1))))), + vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec2))))), + vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec2))))), + }}; + return vquantize_signed(acc, requant_qinfo); +} + +template <typename T> +inline T vrequantize_pooling(T &vec, const UniformQuantizationInfo &requant_qinfo); + +template <> +inline uint8x8_t vrequantize_pooling(uint8x8_t &vec, const UniformQuantizationInfo &requant_qinfo) +{ + const float32x4x2_t acc = {{ + vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec))))), + vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec))))), + }}; + return vquantize(acc, requant_qinfo); +} + +template <> +inline int8x8_t vrequantize_pooling(int8x8_t &vec, const UniformQuantizationInfo &requant_qinfo) +{ + const float32x4x2_t acc = {{ + vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec))))), + vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec))))), + }}; + return vquantize_signed(acc, requant_qinfo); +} + +} // namespace +} // namespace cpu +} // namespace arm_compute +#endif /* SRC_CORE_HELPERS_POOLINGHELPERS_H */ diff --git a/src/core/helpers/ScaleHelpers.h b/src/core/helpers/ScaleHelpers.h index 827bbef4cd..47605e7385 100644 --- a/src/core/helpers/ScaleHelpers.h +++ b/src/core/helpers/ScaleHelpers.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2020 Arm Limited. +* Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,39 +36,6 @@ namespace arm_compute { namespace scale_helpers { -/** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. - * - * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. - * @param[in] stride Stride to access the bottom-left and bottom-right pixel values - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer - * - * @note dx and dy must be in the range [0, 1.0] - * - * @return The bilinear interpolated pixel value - */ -template <typename T> -inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const float dx1 = 1.0f - dx; - const float dy1 = 1.0f - dy; - - const T a00 = *pixel_ptr; - const T a01 = *(pixel_ptr + 1); - const T a10 = *(pixel_ptr + stride); - const T a11 = *(pixel_ptr + stride + 1); - - const float w1 = dx1 * dy1; - const float w2 = dx * dy1; - const float w3 = dx1 * dy; - const float w4 = dx * dy; - - return static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4); -} - /** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8 and in single channel format. * @@ -83,8 +50,12 @@ inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy * * @return The bilinear interpolated pixel value */ -inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stride, float dx, float dy, - UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info) +inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, + size_t stride, + float dx, + float dy, + UniformQuantizationInfo iq_info, + UniformQuantizationInfo oq_info) { ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); @@ -118,8 +89,12 @@ inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stri * * @return The bilinear interpolated pixel value */ -inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, size_t stride, float dx, float dy, - UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info) +inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, + size_t stride, + float dx, + float dy, + UniformQuantizationInfo iq_info, + UniformQuantizationInfo oq_info) { ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); @@ -139,130 +114,6 @@ inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, size_t stride return static_cast<int8_t>(quantize_qasymm8_signed(res, oq_info)); } -/** Computes linear interpolation using the pointer to the top pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. - * - * @param[in] pixel_ptr Pointer to the top pixel value of a single channel input. - * @param[in] stride Stride to access the bottom pixel value - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer - * - * @note dy must be in the range [0, 1.0] - * - * @return The linear interpolated pixel value - */ -template <typename T> -inline T delta_linear_c1_y(const T *pixel_ptr, size_t stride, float dy) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const float dy1 = 1.0f - dy; - - const T a00 = *pixel_ptr; - const T a10 = *(pixel_ptr + stride); - - const float w1 = dy1; - const float w3 = dy; - - return static_cast<T>(a00 * w1 + a10 * w3); -} - -/** Computes linear interpolation using the pointer to the left pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. - * - * @param[in] pixel_ptr Pointer to the left pixel value of a single channel input. - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer - * - * @note dx must be in the range [0, 1.0] - * - * @return The linear interpolated pixel value - */ -template <typename T> -inline T delta_linear_c1_x(const T *pixel_ptr, float dx) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const T a00 = *pixel_ptr; - const T a01 = *(pixel_ptr + 1); - - const float dx1 = 1.0f - dx; - - const float w1 = dx1; - const float w2 = dx; - - return static_cast<T>(a00 * w1 + a01 * w2); -} - -/** Return the pixel at (x,y) using bilinear interpolation. - * - * @warning Only works if the iterator was created with an IImage - * - * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel input. - * @param[in] stride Stride in bytes of the image; - * @param[in] x X position of the wanted pixel - * @param[in] y Y position of the wanted pixel - * - * @return The pixel at (x, y) using bilinear interpolation. - */ -template <typename T> -inline T pixel_bilinear_c1(const T *first_pixel_ptr, size_t stride, float x, float y) -{ - ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); - - const int32_t xi = std::floor(x); - const int32_t yi = std::floor(y); - - const float dx = x - xi; - const float dy = y - yi; - - return delta_bilinear_c1(first_pixel_ptr + xi + yi * stride, stride, dx, dy); -} - -/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel input - * - * @warning Only works if the iterator was created with an IImage - * - * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel image. - * @param[in] stride Stride in bytes of the image - * @param[in] width Width of the image - * @param[in] height Height of the image - * @param[in] x X position of the wanted pixel - * @param[in] y Y position of the wanted pixel - * - * @return The pixel at (x, y) using bilinear interpolation. - */ -template <typename T> -inline uint8_t -pixel_bilinear_c1_clamp(const T *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y) -{ - ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); - - x = std::max(-1.f, std::min(x, static_cast<float>(width))); - y = std::max(-1.f, std::min(y, static_cast<float>(height))); - - const float xi = std::floor(x); - const float yi = std::floor(y); - - const float dx = x - xi; - const float dy = y - yi; - - if(dx == 0.0f) - { - if(dy == 0.0f) - { - return static_cast<T>(first_pixel_ptr[static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride]); - } - return delta_linear_c1_y(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, - stride, dy); - } - if(dy == 0.0f) - { - return delta_linear_c1_x(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, - dx); - } - return delta_bilinear_c1(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, stride, - dx, dy); -} - /** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8 * * @note The interpolation area depends on the width and height ration of the input and output images @@ -279,9 +130,8 @@ pixel_bilinear_c1_clamp(const T *first_pixel_ptr, size_t stride, size_t width, s * * @return The pixel at (x, y) using area interpolation. */ -inline uint8_t -pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, - float hr, int x, int y) +inline uint8_t pixel_area_c1u8_clamp( + const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y) { ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); @@ -316,7 +166,7 @@ pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t widt // Sum pixels in area int sum = 0; - for(int j = yi + y_from, je = yi + y_to; j <= je; ++j) + for (int j = yi + y_from, je = yi + y_to; j <= je; ++j) { const uint8_t *ptr = first_pixel_ptr + j * stride + xi + x_from; sum = std::accumulate(ptr, ptr + x_elements, sum); @@ -325,6 +175,32 @@ pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t widt // Return average return sum / (x_elements * y_elements); } + +/** Computes bilinear interpolation using the top-left, top-right, bottom-left, bottom-right pixels and the pixel's distance between + * the real coordinates and the smallest following integer coordinates. + * + * @param[in] a00 The top-left pixel value. + * @param[in] a01 The top-right pixel value. + * @param[in] a10 The bottom-left pixel value. + * @param[in] a11 The bottom-right pixel value. + * @param[in] dx_val Pixel's distance between the X real coordinate and the smallest X following integer + * @param[in] dy_val Pixel's distance between the Y real coordinate and the smallest Y following integer + * + * @note dx and dy must be in the range [0, 1.0] + * + * @return The bilinear interpolated pixel value + */ +inline float delta_bilinear(float a00, float a01, float a10, float a11, float dx_val, float dy_val) +{ + const float dx1_val = 1.0f - dx_val; + const float dy1_val = 1.0f - dy_val; + + const float w1 = dx1_val * dy1_val; + const float w2 = dx_val * dy1_val; + const float w3 = dx1_val * dy_val; + const float w4 = dx_val * dy_val; + return a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; +} } // namespace scale_helpers } // namespace arm_compute diff --git a/src/core/helpers/SoftmaxHelpers.cpp b/src/core/helpers/SoftmaxHelpers.cpp index 71b971af31..8184991ab5 100644 --- a/src/core/helpers/SoftmaxHelpers.cpp +++ b/src/core/helpers/SoftmaxHelpers.cpp @@ -29,7 +29,7 @@ namespace softmax_helpers { PermutationVector get_permutation_vector_from_softmax_axis(size_t axis) { - switch(axis) + switch (axis) { case 1: return PermutationVector(1U, 0U, 2U, 3U); diff --git a/src/core/helpers/Utils.cpp b/src/core/helpers/Utils.cpp new file mode 100644 index 0000000000..f8895d8a3c --- /dev/null +++ b/src/core/helpers/Utils.cpp @@ -0,0 +1,49 @@ +/* +* Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/helpers/Utils.h" + +namespace arm_compute +{ +bool has_holes(const ITensorInfo &info) +{ + return has_holes(info, info.num_dimensions() - 1); +} + +bool has_holes(const ITensorInfo &info, size_t dimension) +{ + const auto &shape = info.tensor_shape(); + const auto &strides = info.strides_in_bytes(); + size_t squashed_bytes = info.element_size(); + + for (size_t dim = 0; dim <= dimension; ++dim) + { + if (strides[dim] != squashed_bytes) + { + return true; + } + squashed_bytes *= shape[dim]; + } + return false; +} +} // namespace arm_compute diff --git a/src/core/helpers/Utils.h b/src/core/helpers/Utils.h index 3c3b2b93f9..a17a78f7ee 100644 --- a/src/core/helpers/Utils.h +++ b/src/core/helpers/Utils.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2020 Arm Limited. +* Copyright (c) 2020-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_CORE_HELPERS_UTILS_H -#define SRC_CORE_HELPERS_UTILS_H +#ifndef ACL_SRC_CORE_HELPERS_UTILS_H +#define ACL_SRC_CORE_HELPERS_UTILS_H #include "arm_compute/core/ITensorInfo.h" @@ -38,14 +38,14 @@ namespace arm_compute * calculated based on the tensor shape and the strides of lower dimensions. */ template <typename T, typename... Ts> -inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fixed_strides) +inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&...fixed_strides) { const TensorShape &shape = info.tensor_shape(); // Create strides object Strides strides(stride_x, fixed_strides...); - for(size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i) + for (size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i) { strides.set(i, shape[i - 1] * strides[i - 1]); } @@ -92,6 +92,29 @@ inline unsigned int get_next_power_two(unsigned int x) return x; } + +/** Check if the tensor has any holes. + * + * A hole is defined as any gap in the tensor between two consecutive values. This can be a result of extending + * the paddings or manipulating the strides of the tensor + * + * @param[in] info Tensor info object defining the shape of the input tensor. + * + * @note This function checks for holes in all dimensions. + * + */ +bool has_holes(const ITensorInfo &info); + +/** Check if the tensor has any holes. + * + * @param[in] info Tensor info object defining the shape of the input tensor. + * @param[in] dimension Highest dimension to check. + * + * @note This function checks for holes in all the dimensions upto and including the highest dimension. + * + */ +bool has_holes(const ITensorInfo &info, size_t dimension); + } // namespace arm_compute -#endif /* SRC_CORE_HELPERS_UTILS_H */ +#endif // ACL_SRC_CORE_HELPERS_UTILS_H diff --git a/src/core/helpers/WindowHelpers.cpp b/src/core/helpers/WindowHelpers.cpp index ba10eb9775..30a55fcbc6 100644 --- a/src/core/helpers/WindowHelpers.cpp +++ b/src/core/helpers/WindowHelpers.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2020 Arm Limited. +* Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,9 +25,10 @@ namespace arm_compute { -Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size) +Window +calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size) { - if(!skip_border) + if (!skip_border) { border_size = BorderSize(0); } @@ -38,40 +39,104 @@ Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, Window window; window.set(0, Window::Dimension( - // Skip the border left of the image - anchor[0] + border_size.left, - // Skip the border right of the image - // Make sure the window width is a multiple of the step size - anchor[0] + border_size.left + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - static_cast<int>(border_size.right)), steps[0]), - steps[0])); + // Skip the border left of the image + anchor[0] + border_size.left, + // Skip the border right of the image + // Make sure the window width is a multiple of the step size + anchor[0] + border_size.left + + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - + static_cast<int>(border_size.right)), + steps[0]), + steps[0])); size_t n = 1; - if(anchor.num_dimensions() > 1) + if (anchor.num_dimensions() > 1) { - window.set(1, Window::Dimension( + window.set(1, + Window::Dimension( // Skip the border above the image anchor[1] + border_size.top, // Skip the border below the image - anchor[1] + border_size.top + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - static_cast<int>(border_size.top) - static_cast<int>(border_size.bottom)), steps[1]), + anchor[1] + border_size.top + + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - static_cast<int>(border_size.top) - + static_cast<int>(border_size.bottom)), + steps[1]), steps[1])); ++n; } - if(anchor.num_dimensions() > 2) + if (anchor.num_dimensions() > 2) { window.set(2, Window::Dimension(anchor[2], std::max<size_t>(1, shape[2]), steps[2])); ++n; } - for(; n < anchor.num_dimensions(); ++n) + for (; n < anchor.num_dimensions(); ++n) { window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n]))); } - for(; n < Coordinates::num_max_dimensions; ++n) + for (; n < Coordinates::num_max_dimensions; ++n) + { + window.set(n, Window::Dimension(0, 1)); + } + + return window; +} + +Window calculate_max_window(const TensorShape &shape, const Steps &steps, bool skip_border, BorderSize border_size) +{ + if (!skip_border) + { + border_size = BorderSize(0); + } + + Window window; + + window.set(0, Window::Dimension( + // Skip the border left of the image + border_size.left, + // Skip the border right of the image + // Make sure the window width is a multiple of the step size + border_size.left + + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - + static_cast<int>(border_size.right)), + steps[0]), + steps[0])); + + size_t n = 1; + + if (shape.num_dimensions() > 1) + { + window.set(1, Window::Dimension( + // Skip the border above the image + border_size.top, + // Skip the border below the image + border_size.top + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - + static_cast<int>(border_size.top) - + static_cast<int>(border_size.bottom)), + steps[1]), + steps[1])); + + ++n; + } + + if (shape.num_dimensions() > 2) + { + window.set(2, Window::Dimension(0, std::max<size_t>(1, shape[2]), steps[2])); + + ++n; + } + + for (; n < shape.num_dimensions(); ++n) + { + window.set(n, Window::Dimension(0, std::max<size_t>(1, shape[n]))); + } + + for (; n < Coordinates::num_max_dimensions; ++n) { window.set(n, Window::Dimension(0, 1)); } @@ -87,40 +152,42 @@ Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Step Window window; window.set(0, Window::Dimension( - // move the anchor to the start from the border - anchor[0] - border_size.left, - // move the anchor to include the right end border - // Make sure the window width is a multiple of the step size - anchor[0] - border_size.left + ceil_to_multiple(shape[0] + border_size.left + border_size.right, steps[0]), - steps[0])); + // move the anchor to the start from the border + anchor[0] - border_size.left, + // move the anchor to include the right end border + // Make sure the window width is a multiple of the step size + anchor[0] - border_size.left + + ceil_to_multiple(shape[0] + border_size.left + border_size.right, steps[0]), + steps[0])); size_t n = 1; - if(anchor.num_dimensions() > 1) + if (anchor.num_dimensions() > 1) { window.set(1, Window::Dimension( - // Include the border above the image - anchor[1] - border_size.top, - // Include the border below the image - anchor[1] - border_size.top + ceil_to_multiple(shape[1] + border_size.top + border_size.bottom, steps[1]), - steps[1])); + // Include the border above the image + anchor[1] - border_size.top, + // Include the border below the image + anchor[1] - border_size.top + + ceil_to_multiple(shape[1] + border_size.top + border_size.bottom, steps[1]), + steps[1])); ++n; } - if(anchor.num_dimensions() > 2) + if (anchor.num_dimensions() > 2) { window.set(2, Window::Dimension(0, std::max<size_t>(1, shape[n]), steps[2])); ++n; } - for(; n < anchor.num_dimensions(); ++n) + for (; n < anchor.num_dimensions(); ++n) { window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n]))); } - for(; n < Coordinates::num_max_dimensions; ++n) + for (; n < Coordinates::num_max_dimensions; ++n) { window.set(n, Window::Dimension(0, 1)); } @@ -128,9 +195,12 @@ Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Step return window; } -Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size) +Window calculate_max_window_horizontal(const ValidRegion &valid_region, + const Steps &steps, + bool skip_border, + BorderSize border_size) { - if(skip_border) + if (skip_border) { border_size.top = 0; border_size.bottom = 0; @@ -147,37 +217,133 @@ Window calculate_max_window_horizontal(const ValidRegion &valid_region, const St Window window; window.set(0, Window::Dimension( - // Skip the border left of the image - anchor[0] + border_size.left, - // Skip the border right of the image - // Make sure the window width is a multiple of the step size - anchor[0] + border_size.left + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - static_cast<int>(border_size.right)), steps[0]), - steps[0])); + // Skip the border left of the image + anchor[0] + border_size.left, + // Skip the border right of the image + // Make sure the window width is a multiple of the step size + anchor[0] + border_size.left + + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - + static_cast<int>(border_size.right)), + steps[0]), + steps[0])); size_t n = 1; - if(anchor.num_dimensions() > 1) + if (anchor.num_dimensions() > 1) { window.set(1, Window::Dimension( - // Skip the border above the image - anchor[1] - border_size.top, - // Skip the border below the image - anchor[1] + shape[1] + border_size.bottom, - 1)); + // Skip the border above the image + anchor[1] - border_size.top, + // Skip the border below the image + anchor[1] + shape[1] + border_size.bottom, 1)); ++n; } - for(; n < anchor.num_dimensions(); ++n) + for (; n < anchor.num_dimensions(); ++n) { window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n]))); } - for(; n < Coordinates::num_max_dimensions; ++n) + for (; n < Coordinates::num_max_dimensions; ++n) { window.set(n, Window::Dimension(0, 1)); } return window; } + +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1) +{ + const auto &shape0 = src0.tensor_shape(); + const auto &shape1 = src1.tensor_shape(); + const auto &strides0 = src0.strides_in_bytes(); + const auto &strides1 = src1.strides_in_bytes(); + const auto num_dimensions = std::max(src0.num_dimensions(), src1.num_dimensions()); + + Window win; + size_t split_dimension = Window::DimY; + size_t dim = 0; + + size_t squashed_bytes = src0.element_size(); + + // Try to squash the low dimensions together. + for (; dim < num_dimensions; ++dim) + { + if (shape0[dim] != shape1[dim] || strides0[dim] != squashed_bytes || strides1[dim] != squashed_bytes) + { + break; + } + + squashed_bytes *= shape0[dim]; + } + + if (dim == num_dimensions) + { + auto squashed_elements = squashed_bytes / src0.element_size(); + + split_dimension = Window::DimX; + + // The input tensors can be interpreted as 1D array. + win.set(0, Window::Dimension(0, squashed_elements, 1)); + + for (dim = 1; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, 1, 1)); + } + } + else + { + // Generates the max window. + for (dim = 0; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, std::max(shape0[dim], shape1[dim]), 1)); + } + } + + return std::make_pair(win, split_dimension); +} + +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src) +{ + const auto &shape = src.tensor_shape(); + const auto &strides = src.strides_in_bytes(); + const auto num_dimensions = src.num_dimensions(); + + Window win; + size_t split_dimension = Window::DimY; + size_t dim = 0; + size_t squashed_bytes = src.element_size(); + + // Try to squash the low dimensions together. + for (; dim < num_dimensions; ++dim) + { + if (strides[dim] != squashed_bytes) + { + break; + } + squashed_bytes *= shape[dim]; + } + if (dim == num_dimensions) + { + const auto squashed_elements = squashed_bytes / src.element_size(); + split_dimension = Window::DimX; + // The input tensor can be interpreted as 1D array. + win.set(0, Window::Dimension(0, squashed_elements, 1)); + for (dim = 1; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, 1, 1)); + } + } + else + { + // Generate the max window. + for (dim = 0; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, shape[dim], 1)); + } + } + return std::make_pair(win, split_dimension); +} + } // namespace arm_compute diff --git a/src/core/helpers/WindowHelpers.h b/src/core/helpers/WindowHelpers.h index 9bc2135b6d..e404c18e8a 100644 --- a/src/core/helpers/WindowHelpers.h +++ b/src/core/helpers/WindowHelpers.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2020 Arm Limited. +* Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,23 +43,13 @@ namespace arm_compute * influence the returned value. */ template <typename... Ts> -bool update_window_and_padding(Window &win, Ts &&... patterns) +bool update_window_and_padding(Window &win, Ts &&...patterns) { bool window_changed = false; - utility::for_each([&](const IAccessWindow & w) - { - window_changed |= w.update_window_if_needed(win); - }, - patterns...); - - bool padding_changed = false; + utility::for_each([&](const IAccessWindow &w) { window_changed |= w.update_window_if_needed(win); }, patterns...); - utility::for_each([&](IAccessWindow & w) - { - padding_changed |= w.update_padding_if_needed(win); - }, - patterns...); + utility::for_each([&](IAccessWindow &w) { w.update_padding_if_needed(win); }, patterns...); return window_changed; } @@ -71,18 +61,18 @@ bool update_window_and_padding(Window &win, Ts &&... patterns) * @return Intersection of all regions. */ template <typename... Ts> -ValidRegion intersect_valid_regions(const Ts &... regions) +ValidRegion intersect_valid_regions(const Ts &...regions) { - auto intersect = [](const ValidRegion & r1, const ValidRegion & r2) -> ValidRegion + auto intersect = [](const ValidRegion &r1, const ValidRegion &r2) -> ValidRegion { ValidRegion region; - for(size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d) + for (size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d) { region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d])); } - for(size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d) + for (size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d) { region.shape.set(d, std::min(r1.shape[d], r2.shape[d])); } @@ -103,7 +93,24 @@ ValidRegion intersect_valid_regions(const Ts &... regions) * * @return The maximum window the kernel can be executed on. */ -Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); +Window calculate_max_window(const ValidRegion &valid_region, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()); + +/** Calculate the maximum window for a given tensor shape and border setting + * + * @param[in] shape Shape of the tensor space + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_window(const TensorShape &shape, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()); /** Calculate the maximum window for a given tensor shape and border setting * @@ -114,9 +121,12 @@ Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps * * @return The maximum window the kernel can be executed on. */ -inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()) +inline Window calculate_max_window(const ITensorInfo &info, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()) { - return calculate_max_window(info.valid_region(), steps, skip_border, border_size); + return calculate_max_window(info.tensor_shape(), steps, skip_border, border_size); } /** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting @@ -128,7 +138,10 @@ inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps = * * @return The maximum window the kernel can be executed on. */ -Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); +Window calculate_max_window_horizontal(const ValidRegion &valid_region, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()); /** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting * @@ -139,7 +152,10 @@ Window calculate_max_window_horizontal(const ValidRegion &valid_region, const St * * @return The maximum window the kernel can be executed on. */ -inline Window calculate_max_window_horizontal(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()) +inline Window calculate_max_window_horizontal(const ITensorInfo &info, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()) { return calculate_max_window_horizontal(info.valid_region(), steps, skip_border, border_size); } @@ -152,7 +168,9 @@ inline Window calculate_max_window_horizontal(const ITensorInfo &info, const Ste * * @return The maximum window the kernel can be executed on. */ -Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps = Steps(), BorderSize border_size = BorderSize()); +Window calculate_max_enlarged_window(const ValidRegion &valid_region, + const Steps &steps = Steps(), + BorderSize border_size = BorderSize()); /** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border. * @@ -162,10 +180,50 @@ Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Step * * @return The maximum window the kernel can be executed on. */ -inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps &steps = Steps(), BorderSize border_size = BorderSize()) +inline Window calculate_max_enlarged_window(const ITensorInfo &info, + const Steps &steps = Steps(), + BorderSize border_size = BorderSize()) { return calculate_max_enlarged_window(info.valid_region(), steps, border_size); } + +/** Calculate the squashed or maximum window for the given tensor shape. + * + * If the tensor data resides continuously in the memory, the tensor can be interpreted + * as 1D array and all the dimensions can be squashed together into the x-dimension. + * Otherwise, generate the max window for the given tensor shape. + * + * @param[in] src Tensor info object defining the shape of the input tensor. + * + * @return The maximum window the kernel can be executed on and the preferred split dimension. + */ +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src); + +/** Calculate the squashed or maximum window for the given tensor shapes. + * + * If the tensor data resides continuously in the memory, the tensor can be interpreted + * as 1D array and all the dimensions can be squashed together into the x-dimension. + * Otherwise, generate the max window for the given tensor shapes. + * + * @param[in] src0 Tensor info object defining the shape of the first input tensor. + * @param[in] src1 Tensor info object defining the shape of the second input tensor. + * + * @return The squashed or maximum window the kernel can be executed on and the preferred split dimension. + */ +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1); + +/** Function to compute the shape of output and window for the given inputs + * + * @param[in] infos Input tensor informations + * + * @return A pair of the shape and window + */ +template <typename... Shapes> +std::pair<TensorShape, Window> compute_output_shape_and_window(const Shapes &...shapes) +{ + const TensorShape out_shape = TensorShape::broadcast_shape(shapes...); + return std::make_pair(out_shape, calculate_max_window(out_shape)); +} #endif /* DOXYGEN_SKIP_THIS */ } // namespace arm_compute |