diff options
Diffstat (limited to 'src/core/helpers')
-rw-r--r-- | src/core/helpers/AutoConfiguration.h | 39 | ||||
-rw-r--r-- | src/core/helpers/LUTManager.cpp | 79 | ||||
-rw-r--r-- | src/core/helpers/LUTManager.h | 73 | ||||
-rw-r--r-- | src/core/helpers/MemoryHelpers.h | 74 | ||||
-rw-r--r-- | src/core/helpers/PoolingHelpers.h | 219 | ||||
-rw-r--r-- | src/core/helpers/ScaleHelpers.h | 23 | ||||
-rw-r--r-- | src/core/helpers/SoftmaxHelpers.cpp | 2 | ||||
-rw-r--r-- | src/core/helpers/Utils.cpp | 49 | ||||
-rw-r--r-- | src/core/helpers/Utils.h | 35 | ||||
-rw-r--r-- | src/core/helpers/WindowHelpers.cpp | 239 | ||||
-rw-r--r-- | src/core/helpers/WindowHelpers.h | 86 |
11 files changed, 784 insertions, 134 deletions
diff --git a/src/core/helpers/AutoConfiguration.h b/src/core/helpers/AutoConfiguration.h index 6880a6cb66..9df2a76983 100644 --- a/src/core/helpers/AutoConfiguration.h +++ b/src/core/helpers/AutoConfiguration.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2020 Arm Limited. +* Copyright (c) 2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/DataTypeUtils.h" namespace arm_compute { @@ -41,10 +42,11 @@ namespace arm_compute */ inline bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, - int num_channels, DataType data_type, - QuantizationInfo quantization_info = QuantizationInfo()) + int num_channels, + DataType data_type, + QuantizationInfo quantization_info = QuantizationInfo()) { - if(info.tensor_shape().total_size() == 0) + if (info.tensor_shape().total_size() == 0) { info.set_data_type(data_type); info.set_num_channels(num_channels); @@ -57,21 +59,26 @@ inline bool auto_init_if_empty(ITensorInfo &info, } /** Auto initialize the tensor info using another tensor info. -* -* @param info_sink Tensor info used to check and assign -* @param info_source Tensor info used to assign -* -* @return True if the tensor info has been initialized -*/ + * + * (COMPMID-6012) This method should remain in sync with the fields of ITensorInfo that have setters. + * + * + * @param info_sink Tensor info used to check and assign + * @param info_source Tensor info used to assign + * + * + * @return True if the tensor info has been initialized + */ inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source) { - if(info_sink.tensor_shape().total_size() == 0) + if (info_sink.tensor_shape().total_size() == 0) { info_sink.set_data_type(info_source.data_type()); info_sink.set_num_channels(info_source.num_channels()); info_sink.set_tensor_shape(info_source.tensor_shape()); info_sink.set_quantization_info(info_source.quantization_info()); info_sink.set_data_layout(info_source.data_layout()); + info_sink.set_are_values_constant(info_source.are_values_constant()); return true; } @@ -87,7 +94,7 @@ inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_s */ inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape) { - if(info.tensor_shape().total_size() == 0) + if (info.tensor_shape().total_size() == 0) { info.set_tensor_shape(shape); return true; @@ -106,7 +113,7 @@ inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape) */ inline bool set_format_if_unknown(ITensorInfo &info, Format format) { - if(info.data_type() == DataType::UNKNOWN) + if (info.data_type() == DataType::UNKNOWN) { info.set_format(format); return true; @@ -125,7 +132,7 @@ inline bool set_format_if_unknown(ITensorInfo &info, Format format) */ inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type) { - if(info.data_type() == DataType::UNKNOWN) + if (info.data_type() == DataType::UNKNOWN) { info.set_data_type(data_type); return true; @@ -144,7 +151,7 @@ inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type) */ inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout) { - if(info.data_layout() == DataLayout::UNKNOWN) + if (info.data_layout() == DataLayout::UNKNOWN) { info.set_data_layout(data_layout); return true; @@ -163,7 +170,7 @@ inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout */ inline bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info) { - if(info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type()))) + if (info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type()))) { info.set_quantization_info(quantization_info); return true; diff --git a/src/core/helpers/LUTManager.cpp b/src/core/helpers/LUTManager.cpp new file mode 100644 index 0000000000..06e35eed8c --- /dev/null +++ b/src/core/helpers/LUTManager.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/core/helpers/LUTManager.h" + +namespace arm_compute +{ +#ifdef __aarch64__ +namespace +{ + +void init_lut_fp16(ActivationLayerInfo::LookupTable65536 *lut) +{ + union Element + { + uint16_t i = 0; + float16_t fp; + } item; + // Fill lut by iterating over all 16 bit values using the union. + while (true) + { + (*lut)[item.i] = 1.f / (1.f + std::exp(-item.fp)); + if (item.i == 65535) + break; + item.i++; + } +} +} // namespace + +std::shared_ptr<ActivationLayerInfo::LookupTable65536> LUTManager::get_lut_table(LUTInfo info) +{ + const auto itr = map_fp16.find(info); + auto s_ptr = (itr != map_fp16.end()) ? itr->second.lock() : nullptr; // nullptr if invalid or not found. + if (s_ptr != nullptr) + { + // Found and valid + return s_ptr; // Return weak ptr as shared ptr + } + else + { + // Not found, or pointer not valid + // We do not use make_shared to prevent the weak_ptr keeping the control block alive + std::shared_ptr<ActivationLayerInfo::LookupTable65536> ptr(new ActivationLayerInfo::LookupTable65536); + init_lut_fp16(ptr.get()); + map_fp16[info] = ptr; + return ptr; + } +} +#endif // __aarch64__ + +// Static function to get LutManager instance +LUTManager &LUTManager::get_instance() +{ + static auto inst_ = std::make_unique<LUTManager>(); // The one, single instance. + return *inst_; +} + +} // namespace arm_compute diff --git a/src/core/helpers/LUTManager.h b/src/core/helpers/LUTManager.h new file mode 100644 index 0000000000..4e13ead7e3 --- /dev/null +++ b/src/core/helpers/LUTManager.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_SRC_CORE_HELPERS_LUTMANAGER_H +#define ACL_SRC_CORE_HELPERS_LUTMANAGER_H + +#include "arm_compute/core/CoreTypes.h" +#include "arm_compute/core/QuantizationInfo.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" + +#include <map> +#include <memory> + +namespace arm_compute +{ + +struct LUTInfo +{ + ActivationLayerInfo::ActivationFunction act; + DataType dt; + QuantizationInfo qinfo; + // Operators enable use of map with Lutinfo as key + friend bool operator<(const LUTInfo &l, const LUTInfo &r) + { + return (l.act < r.act) || ((l.act == r.act) && (l.dt < r.dt)) || + ((l.act == r.act) && (l.dt == r.dt) && (l.qinfo.scale() < r.qinfo.scale())) || + ((l.act == r.act) && (l.dt == r.dt) && (l.qinfo.scale() == r.qinfo.scale()) && + (l.qinfo.offset() < l.qinfo.offset())); + } + bool operator==(const LUTInfo &l) + { + return this->act == l.act && this->dt == l.dt && this->qinfo == l.qinfo; + } +}; + +/* Class to handle getting look up table */ +class LUTManager +{ +public: + LUTManager() = default; + + static LUTManager &get_instance(); +#ifdef __aarch64__ + std::shared_ptr<ActivationLayerInfo::LookupTable65536> get_lut_table(LUTInfo info); + +private: + std::map<LUTInfo, std::weak_ptr<ActivationLayerInfo::LookupTable65536>> map_fp16{}; +#endif // __aarch64__ +}; + +} // namespace arm_compute +#endif // ACL_SRC_CORE_HELPERS_LUTMANAGER_H diff --git a/src/core/helpers/MemoryHelpers.h b/src/core/helpers/MemoryHelpers.h index e751e6025d..dd094b414c 100644 --- a/src/core/helpers/MemoryHelpers.h +++ b/src/core/helpers/MemoryHelpers.h @@ -24,9 +24,9 @@ #ifndef SRC_COMMON_MEMORY_HELPERS_H #define SRC_COMMON_MEMORY_HELPERS_H +#include "arm_compute/core/experimental/Types.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/experimental/Types.h" #include "arm_compute/runtime/MemoryGroup.h" #include <memory> @@ -41,12 +41,19 @@ inline int offset_int_vec(int offset) } template <typename TensorType> -using WorkspaceData = std::vector<std::pair<int, std::unique_ptr<TensorType>>>; +struct WorkspaceDataElement +{ + int slot{-1}; + experimental::MemoryLifetime lifetime{experimental::MemoryLifetime::Temporary}; + std::unique_ptr<TensorType> tensor{nullptr}; +}; template <typename TensorType> -WorkspaceData<TensorType> manage_workspace(const experimental::MemoryRequirements &mem_reqs, - MemoryGroup &mgroup, - ITensorPack &run_pack) +using WorkspaceData = std::vector<WorkspaceDataElement<TensorType>>; + +template <typename TensorType> +WorkspaceData<TensorType> +manage_workspace(const experimental::MemoryRequirements &mem_reqs, MemoryGroup &mgroup, ITensorPack &run_pack) { ITensorPack dummy_pack = ITensorPack(); return manage_workspace<TensorType>(mem_reqs, mgroup, run_pack, dummy_pack); @@ -55,24 +62,26 @@ WorkspaceData<TensorType> manage_workspace(const experimental::MemoryRequirement template <typename TensorType> WorkspaceData<TensorType> manage_workspace(const experimental::MemoryRequirements &mem_reqs, MemoryGroup &mgroup, - ITensorPack &run_pack, ITensorPack &prep_pack) + ITensorPack &run_pack, + ITensorPack &prep_pack) { WorkspaceData<TensorType> workspace_memory; - for(const auto &req : mem_reqs) + for (const auto &req : mem_reqs) { - if(req.size == 0) + if (req.size == 0) { continue; } - const auto aux_info = TensorInfo{ TensorShape(req.size), 1, DataType::U8 }; - workspace_memory.emplace_back(req.slot, std::make_unique<TensorType>()); + const auto aux_info = TensorInfo{TensorShape(req.size), 1, DataType::U8}; + workspace_memory.emplace_back( + WorkspaceDataElement<TensorType>{req.slot, req.lifetime, std::make_unique<TensorType>()}); - auto aux_tensor = workspace_memory.back().second.get(); + auto aux_tensor = workspace_memory.back().tensor.get(); ARM_COMPUTE_ERROR_ON_NULLPTR(aux_tensor); - aux_tensor->allocator()->init(aux_info); + aux_tensor->allocator()->init(aux_info, req.alignment); - if(req.lifetime == experimental::MemoryLifetime::Temporary) + if (req.lifetime == experimental::MemoryLifetime::Temporary) { mgroup.manage(aux_tensor); } @@ -83,13 +92,48 @@ WorkspaceData<TensorType> manage_workspace(const experimental::MemoryRequirement run_pack.add_tensor(req.slot, aux_tensor); } - for(auto &mem : workspace_memory) + for (auto &mem : workspace_memory) { - auto tensor = mem.second.get(); + auto tensor = mem.tensor.get(); tensor->allocator()->allocate(); } return workspace_memory; } + +template <typename TensorType> +void release_prepare_tensors(WorkspaceData<TensorType> &workspace, ITensorPack &prep_pack) +{ + workspace.erase(std::remove_if(workspace.begin(), workspace.end(), + [&prep_pack](auto &wk) + { + const bool to_erase = wk.lifetime == experimental::MemoryLifetime::Prepare; + if (to_erase) + { + prep_pack.remove_tensor(wk.slot); + } + return to_erase; + }), + workspace.end()); +} + +/** Utility function to release tensors with lifetime marked as Prepare */ +template <typename TensorType> +void release_temporaries(const experimental::MemoryRequirements &mem_reqs, WorkspaceData<TensorType> &workspace) +{ + for (auto &ws : workspace) + { + const int slot = ws.slot; + for (auto &m : mem_reqs) + { + if (m.slot == slot && m.lifetime == experimental::MemoryLifetime::Prepare) + { + auto tensor = ws.tensor.get(); + tensor->allocator()->free(); + break; + } + } + } +} } // namespace arm_compute #endif /* SRC_COMMON_MEMORY_HELPERS_H */ diff --git a/src/core/helpers/PoolingHelpers.h b/src/core/helpers/PoolingHelpers.h new file mode 100644 index 0000000000..9ef045f472 --- /dev/null +++ b/src/core/helpers/PoolingHelpers.h @@ -0,0 +1,219 @@ +/* +* Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CORE_HELPERS_POOLINGHELPERS_H +#define SRC_CORE_HELPERS_POOLINGHELPERS_H + +#include "src/core/NEON/NEAsymm.h" + +namespace arm_compute +{ +namespace cpu +{ +namespace +{ + +inline float calculate_avg_scale_pool3d(bool exclude_padding, + const Coordinates &id, + const int pool_size_x, + const int pool_size_y, + const int pool_size_z, + const int upper_bound_w, + const int upper_bound_h, + const int upper_bound_d, + const int pad_x, + const int pad_y, + const int pad_z, + const int stride_x, + const int stride_y, + const int stride_z) +{ + // Based on NDHWC + int start_x = id[1] * stride_x - pad_x; + int start_y = id[2] * stride_y - pad_y; + int start_z = id[3] * stride_z - pad_z; + + const int end_x = std::min(start_x + pool_size_x, upper_bound_w); + const int end_y = std::min(start_y + pool_size_y, upper_bound_h); + const int end_z = std::min(start_z + pool_size_z, upper_bound_d); + if (exclude_padding) + { + start_x = std::max(0, start_x); + start_y = std::max(0, start_y); + start_z = std::max(0, start_z); + } + return 1.f / ((end_y - start_y) * (end_x - start_x) * (end_z - start_z)); +} + +inline float calculate_avg_scale_pool2d(bool exclude_padding, + DataLayout data_layout, + const Coordinates &id, + const int pool_size_x, + const int pool_size_y, + const int upper_bound_w, + const int upper_bound_h, + const int pad_x, + const int pad_y, + const int stride_x, + const int stride_y) +{ + const unsigned int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const unsigned int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + int start_x = id[idx_width] * stride_x - pad_x; + int start_y = id[idx_height] * stride_y - pad_y; + + const int end_x = std::min(start_x + pool_size_x, upper_bound_w); + const int end_y = std::min(start_y + pool_size_y, upper_bound_h); + if (exclude_padding) + { + start_x = std::max(0, start_x); + start_y = std::max(0, start_y); + } + return 1.f / ((end_y - start_y) * (end_x - start_x)); +} + +template <typename T> +inline typename std::enable_if<std::is_same<T, int8_t>::value, int8_t>::type +quantize(float val, const UniformQuantizationInfo &info) +{ + return quantize_qasymm8_signed(val, info); +} + +template <typename T> +inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint8_t>::type +quantize(float val, const UniformQuantizationInfo &info) +{ + return quantize_qasymm8(val, info); +} + +template <typename T> +inline T vcvtq_q32_f32(float32x4_t values); + +template <> +inline uint32x4_t vcvtq_q32_f32(float32x4_t values) +{ + return vcvtq_u32_f32(values); +} + +template <> +inline int32x4_t vcvtq_q32_f32(float32x4_t values) +{ + return vcvtq_s32_f32(values); +} + +template <typename T> +inline float32x4_t vcvtq_f32_q32(T values); + +template <> +inline float32x4_t vcvtq_f32_q32(uint32x4_t values) +{ + return vcvtq_f32_u32(values); +} + +template <> +inline float32x4_t vcvtq_f32_q32(int32x4_t values) +{ + return vcvtq_f32_s32(values); +} + +template <typename Tout> +inline Tout vrequantize_pooling_with_scale(const float32x4x4_t &acc, + const float quant_rescale, + const float scale_pooling, + const int32_t new_offset); + +template <> +inline uint8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc, + const float quant_rescale, + const float scale_pooling, + const int32_t new_offset) +{ + const float new_scale = quant_rescale / scale_pooling; + return vquantize(acc, UniformQuantizationInfo(new_scale, new_offset)); +} + +template <> +inline int8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc, + const float quant_rescale, + const float scale_pooling, + const int32_t new_offset) +{ + const float new_scale = quant_rescale / scale_pooling; + return vquantize_signed(acc, UniformQuantizationInfo(new_scale, new_offset)); +} + +template <typename Tin, typename Tout> +inline Tout vrequantize_pooling(Tin vec1, Tin vec2, const UniformQuantizationInfo &requant_qinfo); + +template <> +inline uint8x16_t vrequantize_pooling(uint8x8_t vec1, uint8x8_t vec2, const UniformQuantizationInfo &requant_qinfo) +{ + const float32x4x4_t acc = {{ + vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec1))))), + vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec1))))), + vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec2))))), + vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec2))))), + }}; + return vquantize(acc, requant_qinfo); +} + +template <> +inline int8x16_t vrequantize_pooling(int8x8_t vec1, int8x8_t vec2, const UniformQuantizationInfo &requant_qinfo) +{ + const float32x4x4_t acc = {{ + vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec1))))), + vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec1))))), + vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec2))))), + vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec2))))), + }}; + return vquantize_signed(acc, requant_qinfo); +} + +template <typename T> +inline T vrequantize_pooling(T &vec, const UniformQuantizationInfo &requant_qinfo); + +template <> +inline uint8x8_t vrequantize_pooling(uint8x8_t &vec, const UniformQuantizationInfo &requant_qinfo) +{ + const float32x4x2_t acc = {{ + vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec))))), + vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec))))), + }}; + return vquantize(acc, requant_qinfo); +} + +template <> +inline int8x8_t vrequantize_pooling(int8x8_t &vec, const UniformQuantizationInfo &requant_qinfo) +{ + const float32x4x2_t acc = {{ + vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec))))), + vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec))))), + }}; + return vquantize_signed(acc, requant_qinfo); +} + +} // namespace +} // namespace cpu +} // namespace arm_compute +#endif /* SRC_CORE_HELPERS_POOLINGHELPERS_H */ diff --git a/src/core/helpers/ScaleHelpers.h b/src/core/helpers/ScaleHelpers.h index e769bba782..47605e7385 100644 --- a/src/core/helpers/ScaleHelpers.h +++ b/src/core/helpers/ScaleHelpers.h @@ -50,8 +50,12 @@ namespace scale_helpers * * @return The bilinear interpolated pixel value */ -inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stride, float dx, float dy, - UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info) +inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, + size_t stride, + float dx, + float dy, + UniformQuantizationInfo iq_info, + UniformQuantizationInfo oq_info) { ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); @@ -85,8 +89,12 @@ inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stri * * @return The bilinear interpolated pixel value */ -inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, size_t stride, float dx, float dy, - UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info) +inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, + size_t stride, + float dx, + float dy, + UniformQuantizationInfo iq_info, + UniformQuantizationInfo oq_info) { ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); @@ -122,9 +130,8 @@ inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, size_t stride * * @return The pixel at (x, y) using area interpolation. */ -inline uint8_t -pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, - float hr, int x, int y) +inline uint8_t pixel_area_c1u8_clamp( + const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y) { ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); @@ -159,7 +166,7 @@ pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t widt // Sum pixels in area int sum = 0; - for(int j = yi + y_from, je = yi + y_to; j <= je; ++j) + for (int j = yi + y_from, je = yi + y_to; j <= je; ++j) { const uint8_t *ptr = first_pixel_ptr + j * stride + xi + x_from; sum = std::accumulate(ptr, ptr + x_elements, sum); diff --git a/src/core/helpers/SoftmaxHelpers.cpp b/src/core/helpers/SoftmaxHelpers.cpp index 71b971af31..8184991ab5 100644 --- a/src/core/helpers/SoftmaxHelpers.cpp +++ b/src/core/helpers/SoftmaxHelpers.cpp @@ -29,7 +29,7 @@ namespace softmax_helpers { PermutationVector get_permutation_vector_from_softmax_axis(size_t axis) { - switch(axis) + switch (axis) { case 1: return PermutationVector(1U, 0U, 2U, 3U); diff --git a/src/core/helpers/Utils.cpp b/src/core/helpers/Utils.cpp new file mode 100644 index 0000000000..f8895d8a3c --- /dev/null +++ b/src/core/helpers/Utils.cpp @@ -0,0 +1,49 @@ +/* +* Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/helpers/Utils.h" + +namespace arm_compute +{ +bool has_holes(const ITensorInfo &info) +{ + return has_holes(info, info.num_dimensions() - 1); +} + +bool has_holes(const ITensorInfo &info, size_t dimension) +{ + const auto &shape = info.tensor_shape(); + const auto &strides = info.strides_in_bytes(); + size_t squashed_bytes = info.element_size(); + + for (size_t dim = 0; dim <= dimension; ++dim) + { + if (strides[dim] != squashed_bytes) + { + return true; + } + squashed_bytes *= shape[dim]; + } + return false; +} +} // namespace arm_compute diff --git a/src/core/helpers/Utils.h b/src/core/helpers/Utils.h index 326dc962c7..a17a78f7ee 100644 --- a/src/core/helpers/Utils.h +++ b/src/core/helpers/Utils.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2020-2021 Arm Limited. +* Copyright (c) 2020-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_CORE_HELPERS_UTILS_H -#define SRC_CORE_HELPERS_UTILS_H +#ifndef ACL_SRC_CORE_HELPERS_UTILS_H +#define ACL_SRC_CORE_HELPERS_UTILS_H #include "arm_compute/core/ITensorInfo.h" @@ -38,14 +38,14 @@ namespace arm_compute * calculated based on the tensor shape and the strides of lower dimensions. */ template <typename T, typename... Ts> -inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fixed_strides) +inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&...fixed_strides) { const TensorShape &shape = info.tensor_shape(); // Create strides object Strides strides(stride_x, fixed_strides...); - for(size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i) + for (size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i) { strides.set(i, shape[i - 1] * strides[i - 1]); } @@ -92,6 +92,29 @@ inline unsigned int get_next_power_two(unsigned int x) return x; } + +/** Check if the tensor has any holes. + * + * A hole is defined as any gap in the tensor between two consecutive values. This can be a result of extending + * the paddings or manipulating the strides of the tensor + * + * @param[in] info Tensor info object defining the shape of the input tensor. + * + * @note This function checks for holes in all dimensions. + * + */ +bool has_holes(const ITensorInfo &info); + +/** Check if the tensor has any holes. + * + * @param[in] info Tensor info object defining the shape of the input tensor. + * @param[in] dimension Highest dimension to check. + * + * @note This function checks for holes in all the dimensions upto and including the highest dimension. + * + */ +bool has_holes(const ITensorInfo &info, size_t dimension); + } // namespace arm_compute -#endif /* SRC_CORE_HELPERS_UTILS_H */ +#endif // ACL_SRC_CORE_HELPERS_UTILS_H diff --git a/src/core/helpers/WindowHelpers.cpp b/src/core/helpers/WindowHelpers.cpp index 75ffb71b4b..30a55fcbc6 100644 --- a/src/core/helpers/WindowHelpers.cpp +++ b/src/core/helpers/WindowHelpers.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2020-2021 Arm Limited. +* Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,9 +25,10 @@ namespace arm_compute { -Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size) +Window +calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size) { - if(!skip_border) + if (!skip_border) { border_size = BorderSize(0); } @@ -38,40 +39,47 @@ Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, Window window; window.set(0, Window::Dimension( - // Skip the border left of the image - anchor[0] + border_size.left, - // Skip the border right of the image - // Make sure the window width is a multiple of the step size - anchor[0] + border_size.left + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - static_cast<int>(border_size.right)), steps[0]), - steps[0])); + // Skip the border left of the image + anchor[0] + border_size.left, + // Skip the border right of the image + // Make sure the window width is a multiple of the step size + anchor[0] + border_size.left + + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - + static_cast<int>(border_size.right)), + steps[0]), + steps[0])); size_t n = 1; - if(anchor.num_dimensions() > 1) + if (anchor.num_dimensions() > 1) { - window.set(1, Window::Dimension( + window.set(1, + Window::Dimension( // Skip the border above the image anchor[1] + border_size.top, // Skip the border below the image - anchor[1] + border_size.top + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - static_cast<int>(border_size.top) - static_cast<int>(border_size.bottom)), steps[1]), + anchor[1] + border_size.top + + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - static_cast<int>(border_size.top) - + static_cast<int>(border_size.bottom)), + steps[1]), steps[1])); ++n; } - if(anchor.num_dimensions() > 2) + if (anchor.num_dimensions() > 2) { window.set(2, Window::Dimension(anchor[2], std::max<size_t>(1, shape[2]), steps[2])); ++n; } - for(; n < anchor.num_dimensions(); ++n) + for (; n < anchor.num_dimensions(); ++n) { window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n]))); } - for(; n < Coordinates::num_max_dimensions; ++n) + for (; n < Coordinates::num_max_dimensions; ++n) { window.set(n, Window::Dimension(0, 1)); } @@ -81,7 +89,7 @@ Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, Window calculate_max_window(const TensorShape &shape, const Steps &steps, bool skip_border, BorderSize border_size) { - if(!skip_border) + if (!skip_border) { border_size = BorderSize(0); } @@ -89,40 +97,46 @@ Window calculate_max_window(const TensorShape &shape, const Steps &steps, bool s Window window; window.set(0, Window::Dimension( - // Skip the border left of the image - border_size.left, - // Skip the border right of the image - // Make sure the window width is a multiple of the step size - border_size.left + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - static_cast<int>(border_size.right)), steps[0]), - steps[0])); + // Skip the border left of the image + border_size.left, + // Skip the border right of the image + // Make sure the window width is a multiple of the step size + border_size.left + + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - + static_cast<int>(border_size.right)), + steps[0]), + steps[0])); size_t n = 1; - if(shape.num_dimensions() > 1) + if (shape.num_dimensions() > 1) { window.set(1, Window::Dimension( - // Skip the border above the image - border_size.top, - // Skip the border below the image - border_size.top + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - static_cast<int>(border_size.top) - static_cast<int>(border_size.bottom)), steps[1]), - steps[1])); + // Skip the border above the image + border_size.top, + // Skip the border below the image + border_size.top + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - + static_cast<int>(border_size.top) - + static_cast<int>(border_size.bottom)), + steps[1]), + steps[1])); ++n; } - if(shape.num_dimensions() > 2) + if (shape.num_dimensions() > 2) { window.set(2, Window::Dimension(0, std::max<size_t>(1, shape[2]), steps[2])); ++n; } - for(; n < shape.num_dimensions(); ++n) + for (; n < shape.num_dimensions(); ++n) { window.set(n, Window::Dimension(0, std::max<size_t>(1, shape[n]))); } - for(; n < Coordinates::num_max_dimensions; ++n) + for (; n < Coordinates::num_max_dimensions; ++n) { window.set(n, Window::Dimension(0, 1)); } @@ -138,40 +152,42 @@ Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Step Window window; window.set(0, Window::Dimension( - // move the anchor to the start from the border - anchor[0] - border_size.left, - // move the anchor to include the right end border - // Make sure the window width is a multiple of the step size - anchor[0] - border_size.left + ceil_to_multiple(shape[0] + border_size.left + border_size.right, steps[0]), - steps[0])); + // move the anchor to the start from the border + anchor[0] - border_size.left, + // move the anchor to include the right end border + // Make sure the window width is a multiple of the step size + anchor[0] - border_size.left + + ceil_to_multiple(shape[0] + border_size.left + border_size.right, steps[0]), + steps[0])); size_t n = 1; - if(anchor.num_dimensions() > 1) + if (anchor.num_dimensions() > 1) { window.set(1, Window::Dimension( - // Include the border above the image - anchor[1] - border_size.top, - // Include the border below the image - anchor[1] - border_size.top + ceil_to_multiple(shape[1] + border_size.top + border_size.bottom, steps[1]), - steps[1])); + // Include the border above the image + anchor[1] - border_size.top, + // Include the border below the image + anchor[1] - border_size.top + + ceil_to_multiple(shape[1] + border_size.top + border_size.bottom, steps[1]), + steps[1])); ++n; } - if(anchor.num_dimensions() > 2) + if (anchor.num_dimensions() > 2) { window.set(2, Window::Dimension(0, std::max<size_t>(1, shape[n]), steps[2])); ++n; } - for(; n < anchor.num_dimensions(); ++n) + for (; n < anchor.num_dimensions(); ++n) { window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n]))); } - for(; n < Coordinates::num_max_dimensions; ++n) + for (; n < Coordinates::num_max_dimensions; ++n) { window.set(n, Window::Dimension(0, 1)); } @@ -179,9 +195,12 @@ Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Step return window; } -Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size) +Window calculate_max_window_horizontal(const ValidRegion &valid_region, + const Steps &steps, + bool skip_border, + BorderSize border_size) { - if(skip_border) + if (skip_border) { border_size.top = 0; border_size.bottom = 0; @@ -198,37 +217,133 @@ Window calculate_max_window_horizontal(const ValidRegion &valid_region, const St Window window; window.set(0, Window::Dimension( - // Skip the border left of the image - anchor[0] + border_size.left, - // Skip the border right of the image - // Make sure the window width is a multiple of the step size - anchor[0] + border_size.left + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - static_cast<int>(border_size.right)), steps[0]), - steps[0])); + // Skip the border left of the image + anchor[0] + border_size.left, + // Skip the border right of the image + // Make sure the window width is a multiple of the step size + anchor[0] + border_size.left + + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - + static_cast<int>(border_size.right)), + steps[0]), + steps[0])); size_t n = 1; - if(anchor.num_dimensions() > 1) + if (anchor.num_dimensions() > 1) { window.set(1, Window::Dimension( - // Skip the border above the image - anchor[1] - border_size.top, - // Skip the border below the image - anchor[1] + shape[1] + border_size.bottom, - 1)); + // Skip the border above the image + anchor[1] - border_size.top, + // Skip the border below the image + anchor[1] + shape[1] + border_size.bottom, 1)); ++n; } - for(; n < anchor.num_dimensions(); ++n) + for (; n < anchor.num_dimensions(); ++n) { window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n]))); } - for(; n < Coordinates::num_max_dimensions; ++n) + for (; n < Coordinates::num_max_dimensions; ++n) { window.set(n, Window::Dimension(0, 1)); } return window; } + +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1) +{ + const auto &shape0 = src0.tensor_shape(); + const auto &shape1 = src1.tensor_shape(); + const auto &strides0 = src0.strides_in_bytes(); + const auto &strides1 = src1.strides_in_bytes(); + const auto num_dimensions = std::max(src0.num_dimensions(), src1.num_dimensions()); + + Window win; + size_t split_dimension = Window::DimY; + size_t dim = 0; + + size_t squashed_bytes = src0.element_size(); + + // Try to squash the low dimensions together. + for (; dim < num_dimensions; ++dim) + { + if (shape0[dim] != shape1[dim] || strides0[dim] != squashed_bytes || strides1[dim] != squashed_bytes) + { + break; + } + + squashed_bytes *= shape0[dim]; + } + + if (dim == num_dimensions) + { + auto squashed_elements = squashed_bytes / src0.element_size(); + + split_dimension = Window::DimX; + + // The input tensors can be interpreted as 1D array. + win.set(0, Window::Dimension(0, squashed_elements, 1)); + + for (dim = 1; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, 1, 1)); + } + } + else + { + // Generates the max window. + for (dim = 0; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, std::max(shape0[dim], shape1[dim]), 1)); + } + } + + return std::make_pair(win, split_dimension); +} + +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src) +{ + const auto &shape = src.tensor_shape(); + const auto &strides = src.strides_in_bytes(); + const auto num_dimensions = src.num_dimensions(); + + Window win; + size_t split_dimension = Window::DimY; + size_t dim = 0; + size_t squashed_bytes = src.element_size(); + + // Try to squash the low dimensions together. + for (; dim < num_dimensions; ++dim) + { + if (strides[dim] != squashed_bytes) + { + break; + } + squashed_bytes *= shape[dim]; + } + if (dim == num_dimensions) + { + const auto squashed_elements = squashed_bytes / src.element_size(); + split_dimension = Window::DimX; + // The input tensor can be interpreted as 1D array. + win.set(0, Window::Dimension(0, squashed_elements, 1)); + for (dim = 1; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, 1, 1)); + } + } + else + { + // Generate the max window. + for (dim = 0; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, shape[dim], 1)); + } + } + return std::make_pair(win, split_dimension); +} + } // namespace arm_compute diff --git a/src/core/helpers/WindowHelpers.h b/src/core/helpers/WindowHelpers.h index dad5da62d3..e404c18e8a 100644 --- a/src/core/helpers/WindowHelpers.h +++ b/src/core/helpers/WindowHelpers.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2020-2021 Arm Limited. +* Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,23 +43,13 @@ namespace arm_compute * influence the returned value. */ template <typename... Ts> -bool update_window_and_padding(Window &win, Ts &&... patterns) +bool update_window_and_padding(Window &win, Ts &&...patterns) { bool window_changed = false; - utility::for_each([&](const IAccessWindow & w) - { - window_changed |= w.update_window_if_needed(win); - }, - patterns...); - - bool padding_changed = false; + utility::for_each([&](const IAccessWindow &w) { window_changed |= w.update_window_if_needed(win); }, patterns...); - utility::for_each([&](IAccessWindow & w) - { - padding_changed |= w.update_padding_if_needed(win); - }, - patterns...); + utility::for_each([&](IAccessWindow &w) { w.update_padding_if_needed(win); }, patterns...); return window_changed; } @@ -71,18 +61,18 @@ bool update_window_and_padding(Window &win, Ts &&... patterns) * @return Intersection of all regions. */ template <typename... Ts> -ValidRegion intersect_valid_regions(const Ts &... regions) +ValidRegion intersect_valid_regions(const Ts &...regions) { - auto intersect = [](const ValidRegion & r1, const ValidRegion & r2) -> ValidRegion + auto intersect = [](const ValidRegion &r1, const ValidRegion &r2) -> ValidRegion { ValidRegion region; - for(size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d) + for (size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d) { region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d])); } - for(size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d) + for (size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d) { region.shape.set(d, std::min(r1.shape[d], r2.shape[d])); } @@ -103,7 +93,10 @@ ValidRegion intersect_valid_regions(const Ts &... regions) * * @return The maximum window the kernel can be executed on. */ -Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); +Window calculate_max_window(const ValidRegion &valid_region, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()); /** Calculate the maximum window for a given tensor shape and border setting * @@ -114,7 +107,10 @@ Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps * * @return The maximum window the kernel can be executed on. */ -Window calculate_max_window(const TensorShape &shape, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); +Window calculate_max_window(const TensorShape &shape, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()); /** Calculate the maximum window for a given tensor shape and border setting * @@ -125,7 +121,10 @@ Window calculate_max_window(const TensorShape &shape, const Steps &steps = Steps * * @return The maximum window the kernel can be executed on. */ -inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()) +inline Window calculate_max_window(const ITensorInfo &info, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()) { return calculate_max_window(info.tensor_shape(), steps, skip_border, border_size); } @@ -139,7 +138,10 @@ inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps = * * @return The maximum window the kernel can be executed on. */ -Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); +Window calculate_max_window_horizontal(const ValidRegion &valid_region, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()); /** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting * @@ -150,7 +152,10 @@ Window calculate_max_window_horizontal(const ValidRegion &valid_region, const St * * @return The maximum window the kernel can be executed on. */ -inline Window calculate_max_window_horizontal(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()) +inline Window calculate_max_window_horizontal(const ITensorInfo &info, + const Steps &steps = Steps(), + bool skip_border = false, + BorderSize border_size = BorderSize()) { return calculate_max_window_horizontal(info.valid_region(), steps, skip_border, border_size); } @@ -163,7 +168,9 @@ inline Window calculate_max_window_horizontal(const ITensorInfo &info, const Ste * * @return The maximum window the kernel can be executed on. */ -Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps = Steps(), BorderSize border_size = BorderSize()); +Window calculate_max_enlarged_window(const ValidRegion &valid_region, + const Steps &steps = Steps(), + BorderSize border_size = BorderSize()); /** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border. * @@ -173,11 +180,38 @@ Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Step * * @return The maximum window the kernel can be executed on. */ -inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps &steps = Steps(), BorderSize border_size = BorderSize()) +inline Window calculate_max_enlarged_window(const ITensorInfo &info, + const Steps &steps = Steps(), + BorderSize border_size = BorderSize()) { return calculate_max_enlarged_window(info.valid_region(), steps, border_size); } +/** Calculate the squashed or maximum window for the given tensor shape. + * + * If the tensor data resides continuously in the memory, the tensor can be interpreted + * as 1D array and all the dimensions can be squashed together into the x-dimension. + * Otherwise, generate the max window for the given tensor shape. + * + * @param[in] src Tensor info object defining the shape of the input tensor. + * + * @return The maximum window the kernel can be executed on and the preferred split dimension. + */ +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src); + +/** Calculate the squashed or maximum window for the given tensor shapes. + * + * If the tensor data resides continuously in the memory, the tensor can be interpreted + * as 1D array and all the dimensions can be squashed together into the x-dimension. + * Otherwise, generate the max window for the given tensor shapes. + * + * @param[in] src0 Tensor info object defining the shape of the first input tensor. + * @param[in] src1 Tensor info object defining the shape of the second input tensor. + * + * @return The squashed or maximum window the kernel can be executed on and the preferred split dimension. + */ +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1); + /** Function to compute the shape of output and window for the given inputs * * @param[in] infos Input tensor informations @@ -185,7 +219,7 @@ inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps * @return A pair of the shape and window */ template <typename... Shapes> -std::pair<TensorShape, Window> compute_output_shape_and_window(const Shapes &... shapes) +std::pair<TensorShape, Window> compute_output_shape_and_window(const Shapes &...shapes) { const TensorShape out_shape = TensorShape::broadcast_shape(shapes...); return std::make_pair(out_shape, calculate_max_window(out_shape)); |