aboutsummaryrefslogtreecommitdiff
path: root/src/core/helpers
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/helpers')
-rw-r--r--src/core/helpers/AutoConfiguration.h39
-rw-r--r--src/core/helpers/LUTManager.cpp100
-rw-r--r--src/core/helpers/LUTManager.h77
-rw-r--r--src/core/helpers/MemoryHelpers.h139
-rw-r--r--src/core/helpers/PoolingHelpers.h219
-rw-r--r--src/core/helpers/ScaleHelpers.h208
-rw-r--r--src/core/helpers/SoftmaxHelpers.cpp2
-rw-r--r--src/core/helpers/Utils.cpp49
-rw-r--r--src/core/helpers/Utils.h35
-rw-r--r--src/core/helpers/WindowHelpers.cpp258
-rw-r--r--src/core/helpers/WindowHelpers.h108
11 files changed, 974 insertions, 260 deletions
diff --git a/src/core/helpers/AutoConfiguration.h b/src/core/helpers/AutoConfiguration.h
index 6880a6cb66..9df2a76983 100644
--- a/src/core/helpers/AutoConfiguration.h
+++ b/src/core/helpers/AutoConfiguration.h
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2020 Arm Limited.
+* Copyright (c) 2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,7 @@
#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/DataTypeUtils.h"
namespace arm_compute
{
@@ -41,10 +42,11 @@ namespace arm_compute
*/
inline bool auto_init_if_empty(ITensorInfo &info,
const TensorShape &shape,
- int num_channels, DataType data_type,
- QuantizationInfo quantization_info = QuantizationInfo())
+ int num_channels,
+ DataType data_type,
+ QuantizationInfo quantization_info = QuantizationInfo())
{
- if(info.tensor_shape().total_size() == 0)
+ if (info.tensor_shape().total_size() == 0)
{
info.set_data_type(data_type);
info.set_num_channels(num_channels);
@@ -57,21 +59,26 @@ inline bool auto_init_if_empty(ITensorInfo &info,
}
/** Auto initialize the tensor info using another tensor info.
-*
-* @param info_sink Tensor info used to check and assign
-* @param info_source Tensor info used to assign
-*
-* @return True if the tensor info has been initialized
-*/
+ *
+ * (COMPMID-6012) This method should remain in sync with the fields of ITensorInfo that have setters.
+ *
+ *
+ * @param info_sink Tensor info used to check and assign
+ * @param info_source Tensor info used to assign
+ *
+ *
+ * @return True if the tensor info has been initialized
+ */
inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source)
{
- if(info_sink.tensor_shape().total_size() == 0)
+ if (info_sink.tensor_shape().total_size() == 0)
{
info_sink.set_data_type(info_source.data_type());
info_sink.set_num_channels(info_source.num_channels());
info_sink.set_tensor_shape(info_source.tensor_shape());
info_sink.set_quantization_info(info_source.quantization_info());
info_sink.set_data_layout(info_source.data_layout());
+ info_sink.set_are_values_constant(info_source.are_values_constant());
return true;
}
@@ -87,7 +94,7 @@ inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_s
*/
inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape)
{
- if(info.tensor_shape().total_size() == 0)
+ if (info.tensor_shape().total_size() == 0)
{
info.set_tensor_shape(shape);
return true;
@@ -106,7 +113,7 @@ inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape)
*/
inline bool set_format_if_unknown(ITensorInfo &info, Format format)
{
- if(info.data_type() == DataType::UNKNOWN)
+ if (info.data_type() == DataType::UNKNOWN)
{
info.set_format(format);
return true;
@@ -125,7 +132,7 @@ inline bool set_format_if_unknown(ITensorInfo &info, Format format)
*/
inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type)
{
- if(info.data_type() == DataType::UNKNOWN)
+ if (info.data_type() == DataType::UNKNOWN)
{
info.set_data_type(data_type);
return true;
@@ -144,7 +151,7 @@ inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type)
*/
inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout)
{
- if(info.data_layout() == DataLayout::UNKNOWN)
+ if (info.data_layout() == DataLayout::UNKNOWN)
{
info.set_data_layout(data_layout);
return true;
@@ -163,7 +170,7 @@ inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout
*/
inline bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info)
{
- if(info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type())))
+ if (info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type())))
{
info.set_quantization_info(quantization_info);
return true;
diff --git a/src/core/helpers/LUTManager.cpp b/src/core/helpers/LUTManager.cpp
new file mode 100644
index 0000000000..2effffbe92
--- /dev/null
+++ b/src/core/helpers/LUTManager.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/core/helpers/LUTManager.h"
+
+namespace arm_compute
+{
+#ifdef __aarch64__
+namespace
+{
+
+float16_t activation(float16_t x, const LUTInfo &info)
+{
+ float16_t out = 0.f;
+ switch (info.act)
+ {
+ case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+ out = 1.f / (1.f + std::exp(-x));
+ break;
+ case ActivationLayerInfo::ActivationFunction::TANH:
+ {
+ out = static_cast<float16_t>(info.alpha * std::tanh(info.beta * x));
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Unsupported Activation for 16-bit LUT table");
+ break;
+ }
+ return out;
+}
+
+void init_lut_fp16(ActivationLayerInfo::LookupTable65536 *lut, const LUTInfo &info)
+{
+ union Element
+ {
+ uint16_t i = 0;
+ float16_t fp;
+ } item;
+
+ // Fill lut by iterating over all 16 bit values using the union.
+ while (true)
+ {
+ (*lut)[item.i] = activation(item.fp, info);
+ if (item.i == 65535)
+ break;
+ item.i++;
+ }
+}
+} // namespace
+
+std::shared_ptr<ActivationLayerInfo::LookupTable65536> LUTManager::get_lut_table(LUTInfo info)
+{
+ const auto itr = map_fp16.find(info);
+ auto s_ptr = (itr != map_fp16.end()) ? itr->second.lock() : nullptr; // nullptr if invalid or not found.
+ if (s_ptr != nullptr)
+ {
+ // Found and valid
+ return s_ptr; // Return weak ptr as shared ptr
+ }
+ else
+ {
+ // Not found, or pointer not valid
+ // We do not use make_shared to prevent the weak_ptr keeping the control block alive
+ std::shared_ptr<ActivationLayerInfo::LookupTable65536> ptr(new ActivationLayerInfo::LookupTable65536);
+ init_lut_fp16(ptr.get(), info);
+ map_fp16[info] = ptr;
+ return ptr;
+ }
+}
+#endif // __aarch64__
+
+// Static function to get LutManager instance
+LUTManager &LUTManager::get_instance()
+{
+ static auto inst_ = std::make_unique<LUTManager>(); // The one, single instance.
+ return *inst_;
+}
+
+} // namespace arm_compute
diff --git a/src/core/helpers/LUTManager.h b/src/core/helpers/LUTManager.h
new file mode 100644
index 0000000000..f3f4bf2832
--- /dev/null
+++ b/src/core/helpers/LUTManager.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_CORE_HELPERS_LUTMANAGER_H
+#define ACL_SRC_CORE_HELPERS_LUTMANAGER_H
+
+#include "arm_compute/core/CoreTypes.h"
+#include "arm_compute/core/QuantizationInfo.h"
+#include "arm_compute/function_info/ActivationLayerInfo.h"
+
+#include <map>
+#include <memory>
+
+namespace arm_compute
+{
+
+struct LUTInfo
+{
+ ActivationLayerInfo::ActivationFunction act;
+ float alpha;
+ float beta;
+ DataType dt;
+ UniformQuantizationInfo qinfo;
+
+ // Operators enable use of map with Lutinfo as key
+ friend bool operator<(const LUTInfo &l, const LUTInfo &r)
+ {
+ const auto l_tup = std::make_tuple(l.act, l.alpha, l.beta, l.dt, l.qinfo.scale, l.qinfo.offset);
+ const auto r_tup = std::make_tuple(r.act, r.alpha, r.beta, r.dt, r.qinfo.scale, r.qinfo.offset);
+
+ return l_tup < r_tup;
+ }
+ bool operator==(const LUTInfo &l) const
+ {
+ return this->act == l.act && this->alpha == l.alpha && this->beta == l.beta && this->dt == l.dt &&
+ this->qinfo == l.qinfo;
+ }
+};
+
+/* Class to handle getting look up table */
+class LUTManager
+{
+public:
+ LUTManager() = default;
+
+ static LUTManager &get_instance();
+#ifdef __aarch64__
+ std::shared_ptr<ActivationLayerInfo::LookupTable65536> get_lut_table(LUTInfo info);
+
+private:
+ std::map<LUTInfo, std::weak_ptr<ActivationLayerInfo::LookupTable65536>> map_fp16{};
+#endif // __aarch64__
+};
+
+} // namespace arm_compute
+#endif // ACL_SRC_CORE_HELPERS_LUTMANAGER_H
diff --git a/src/core/helpers/MemoryHelpers.h b/src/core/helpers/MemoryHelpers.h
new file mode 100644
index 0000000000..dd094b414c
--- /dev/null
+++ b/src/core/helpers/MemoryHelpers.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_COMMON_MEMORY_HELPERS_H
+#define SRC_COMMON_MEMORY_HELPERS_H
+
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+namespace arm_compute
+{
+inline int offset_int_vec(int offset)
+{
+ return ACL_INT_VEC + offset;
+}
+
+template <typename TensorType>
+struct WorkspaceDataElement
+{
+ int slot{-1};
+ experimental::MemoryLifetime lifetime{experimental::MemoryLifetime::Temporary};
+ std::unique_ptr<TensorType> tensor{nullptr};
+};
+
+template <typename TensorType>
+using WorkspaceData = std::vector<WorkspaceDataElement<TensorType>>;
+
+template <typename TensorType>
+WorkspaceData<TensorType>
+manage_workspace(const experimental::MemoryRequirements &mem_reqs, MemoryGroup &mgroup, ITensorPack &run_pack)
+{
+ ITensorPack dummy_pack = ITensorPack();
+ return manage_workspace<TensorType>(mem_reqs, mgroup, run_pack, dummy_pack);
+}
+
+template <typename TensorType>
+WorkspaceData<TensorType> manage_workspace(const experimental::MemoryRequirements &mem_reqs,
+ MemoryGroup &mgroup,
+ ITensorPack &run_pack,
+ ITensorPack &prep_pack)
+{
+ WorkspaceData<TensorType> workspace_memory;
+ for (const auto &req : mem_reqs)
+ {
+ if (req.size == 0)
+ {
+ continue;
+ }
+
+ const auto aux_info = TensorInfo{TensorShape(req.size), 1, DataType::U8};
+ workspace_memory.emplace_back(
+ WorkspaceDataElement<TensorType>{req.slot, req.lifetime, std::make_unique<TensorType>()});
+
+ auto aux_tensor = workspace_memory.back().tensor.get();
+ ARM_COMPUTE_ERROR_ON_NULLPTR(aux_tensor);
+ aux_tensor->allocator()->init(aux_info, req.alignment);
+
+ if (req.lifetime == experimental::MemoryLifetime::Temporary)
+ {
+ mgroup.manage(aux_tensor);
+ }
+ else
+ {
+ prep_pack.add_tensor(req.slot, aux_tensor);
+ }
+ run_pack.add_tensor(req.slot, aux_tensor);
+ }
+
+ for (auto &mem : workspace_memory)
+ {
+ auto tensor = mem.tensor.get();
+ tensor->allocator()->allocate();
+ }
+
+ return workspace_memory;
+}
+
+template <typename TensorType>
+void release_prepare_tensors(WorkspaceData<TensorType> &workspace, ITensorPack &prep_pack)
+{
+ workspace.erase(std::remove_if(workspace.begin(), workspace.end(),
+ [&prep_pack](auto &wk)
+ {
+ const bool to_erase = wk.lifetime == experimental::MemoryLifetime::Prepare;
+ if (to_erase)
+ {
+ prep_pack.remove_tensor(wk.slot);
+ }
+ return to_erase;
+ }),
+ workspace.end());
+}
+
+/** Utility function to release tensors with lifetime marked as Prepare */
+template <typename TensorType>
+void release_temporaries(const experimental::MemoryRequirements &mem_reqs, WorkspaceData<TensorType> &workspace)
+{
+ for (auto &ws : workspace)
+ {
+ const int slot = ws.slot;
+ for (auto &m : mem_reqs)
+ {
+ if (m.slot == slot && m.lifetime == experimental::MemoryLifetime::Prepare)
+ {
+ auto tensor = ws.tensor.get();
+ tensor->allocator()->free();
+ break;
+ }
+ }
+ }
+}
+} // namespace arm_compute
+#endif /* SRC_COMMON_MEMORY_HELPERS_H */
diff --git a/src/core/helpers/PoolingHelpers.h b/src/core/helpers/PoolingHelpers.h
new file mode 100644
index 0000000000..9ef045f472
--- /dev/null
+++ b/src/core/helpers/PoolingHelpers.h
@@ -0,0 +1,219 @@
+/*
+* Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CORE_HELPERS_POOLINGHELPERS_H
+#define SRC_CORE_HELPERS_POOLINGHELPERS_H
+
+#include "src/core/NEON/NEAsymm.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace
+{
+
+inline float calculate_avg_scale_pool3d(bool exclude_padding,
+ const Coordinates &id,
+ const int pool_size_x,
+ const int pool_size_y,
+ const int pool_size_z,
+ const int upper_bound_w,
+ const int upper_bound_h,
+ const int upper_bound_d,
+ const int pad_x,
+ const int pad_y,
+ const int pad_z,
+ const int stride_x,
+ const int stride_y,
+ const int stride_z)
+{
+ // Based on NDHWC
+ int start_x = id[1] * stride_x - pad_x;
+ int start_y = id[2] * stride_y - pad_y;
+ int start_z = id[3] * stride_z - pad_z;
+
+ const int end_x = std::min(start_x + pool_size_x, upper_bound_w);
+ const int end_y = std::min(start_y + pool_size_y, upper_bound_h);
+ const int end_z = std::min(start_z + pool_size_z, upper_bound_d);
+ if (exclude_padding)
+ {
+ start_x = std::max(0, start_x);
+ start_y = std::max(0, start_y);
+ start_z = std::max(0, start_z);
+ }
+ return 1.f / ((end_y - start_y) * (end_x - start_x) * (end_z - start_z));
+}
+
+inline float calculate_avg_scale_pool2d(bool exclude_padding,
+ DataLayout data_layout,
+ const Coordinates &id,
+ const int pool_size_x,
+ const int pool_size_y,
+ const int upper_bound_w,
+ const int upper_bound_h,
+ const int pad_x,
+ const int pad_y,
+ const int stride_x,
+ const int stride_y)
+{
+ const unsigned int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const unsigned int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+ int start_x = id[idx_width] * stride_x - pad_x;
+ int start_y = id[idx_height] * stride_y - pad_y;
+
+ const int end_x = std::min(start_x + pool_size_x, upper_bound_w);
+ const int end_y = std::min(start_y + pool_size_y, upper_bound_h);
+ if (exclude_padding)
+ {
+ start_x = std::max(0, start_x);
+ start_y = std::max(0, start_y);
+ }
+ return 1.f / ((end_y - start_y) * (end_x - start_x));
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_same<T, int8_t>::value, int8_t>::type
+quantize(float val, const UniformQuantizationInfo &info)
+{
+ return quantize_qasymm8_signed(val, info);
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint8_t>::type
+quantize(float val, const UniformQuantizationInfo &info)
+{
+ return quantize_qasymm8(val, info);
+}
+
+template <typename T>
+inline T vcvtq_q32_f32(float32x4_t values);
+
+template <>
+inline uint32x4_t vcvtq_q32_f32(float32x4_t values)
+{
+ return vcvtq_u32_f32(values);
+}
+
+template <>
+inline int32x4_t vcvtq_q32_f32(float32x4_t values)
+{
+ return vcvtq_s32_f32(values);
+}
+
+template <typename T>
+inline float32x4_t vcvtq_f32_q32(T values);
+
+template <>
+inline float32x4_t vcvtq_f32_q32(uint32x4_t values)
+{
+ return vcvtq_f32_u32(values);
+}
+
+template <>
+inline float32x4_t vcvtq_f32_q32(int32x4_t values)
+{
+ return vcvtq_f32_s32(values);
+}
+
+template <typename Tout>
+inline Tout vrequantize_pooling_with_scale(const float32x4x4_t &acc,
+ const float quant_rescale,
+ const float scale_pooling,
+ const int32_t new_offset);
+
+template <>
+inline uint8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc,
+ const float quant_rescale,
+ const float scale_pooling,
+ const int32_t new_offset)
+{
+ const float new_scale = quant_rescale / scale_pooling;
+ return vquantize(acc, UniformQuantizationInfo(new_scale, new_offset));
+}
+
+template <>
+inline int8x16_t vrequantize_pooling_with_scale(const float32x4x4_t &acc,
+ const float quant_rescale,
+ const float scale_pooling,
+ const int32_t new_offset)
+{
+ const float new_scale = quant_rescale / scale_pooling;
+ return vquantize_signed(acc, UniformQuantizationInfo(new_scale, new_offset));
+}
+
+template <typename Tin, typename Tout>
+inline Tout vrequantize_pooling(Tin vec1, Tin vec2, const UniformQuantizationInfo &requant_qinfo);
+
+template <>
+inline uint8x16_t vrequantize_pooling(uint8x8_t vec1, uint8x8_t vec2, const UniformQuantizationInfo &requant_qinfo)
+{
+ const float32x4x4_t acc = {{
+ vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec1))))),
+ vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec1))))),
+ vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec2))))),
+ vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec2))))),
+ }};
+ return vquantize(acc, requant_qinfo);
+}
+
+template <>
+inline int8x16_t vrequantize_pooling(int8x8_t vec1, int8x8_t vec2, const UniformQuantizationInfo &requant_qinfo)
+{
+ const float32x4x4_t acc = {{
+ vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec1))))),
+ vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec1))))),
+ vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec2))))),
+ vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec2))))),
+ }};
+ return vquantize_signed(acc, requant_qinfo);
+}
+
+template <typename T>
+inline T vrequantize_pooling(T &vec, const UniformQuantizationInfo &requant_qinfo);
+
+template <>
+inline uint8x8_t vrequantize_pooling(uint8x8_t &vec, const UniformQuantizationInfo &requant_qinfo)
+{
+ const float32x4x2_t acc = {{
+ vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8((vec))))),
+ vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8((vec))))),
+ }};
+ return vquantize(acc, requant_qinfo);
+}
+
+template <>
+inline int8x8_t vrequantize_pooling(int8x8_t &vec, const UniformQuantizationInfo &requant_qinfo)
+{
+ const float32x4x2_t acc = {{
+ vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8((vec))))),
+ vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8((vec))))),
+ }};
+ return vquantize_signed(acc, requant_qinfo);
+}
+
+} // namespace
+} // namespace cpu
+} // namespace arm_compute
+#endif /* SRC_CORE_HELPERS_POOLINGHELPERS_H */
diff --git a/src/core/helpers/ScaleHelpers.h b/src/core/helpers/ScaleHelpers.h
index 827bbef4cd..47605e7385 100644
--- a/src/core/helpers/ScaleHelpers.h
+++ b/src/core/helpers/ScaleHelpers.h
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2020 Arm Limited.
+* Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,39 +36,6 @@ namespace arm_compute
{
namespace scale_helpers
{
-/** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between
- * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
- *
- * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input.
- * @param[in] stride Stride to access the bottom-left and bottom-right pixel values
- * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer
- * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer
- *
- * @note dx and dy must be in the range [0, 1.0]
- *
- * @return The bilinear interpolated pixel value
- */
-template <typename T>
-inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy)
-{
- ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
-
- const float dx1 = 1.0f - dx;
- const float dy1 = 1.0f - dy;
-
- const T a00 = *pixel_ptr;
- const T a01 = *(pixel_ptr + 1);
- const T a10 = *(pixel_ptr + stride);
- const T a11 = *(pixel_ptr + stride + 1);
-
- const float w1 = dx1 * dy1;
- const float w2 = dx * dy1;
- const float w3 = dx1 * dy;
- const float w4 = dx * dy;
-
- return static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
-}
-
/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between
* the real coordinates and the smallest following integer coordinates. Input must be QASYMM8 and in single channel format.
*
@@ -83,8 +50,12 @@ inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy
*
* @return The bilinear interpolated pixel value
*/
-inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stride, float dx, float dy,
- UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info)
+inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr,
+ size_t stride,
+ float dx,
+ float dy,
+ UniformQuantizationInfo iq_info,
+ UniformQuantizationInfo oq_info)
{
ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
@@ -118,8 +89,12 @@ inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stri
*
* @return The bilinear interpolated pixel value
*/
-inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, size_t stride, float dx, float dy,
- UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info)
+inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr,
+ size_t stride,
+ float dx,
+ float dy,
+ UniformQuantizationInfo iq_info,
+ UniformQuantizationInfo oq_info)
{
ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
@@ -139,130 +114,6 @@ inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, size_t stride
return static_cast<int8_t>(quantize_qasymm8_signed(res, oq_info));
}
-/** Computes linear interpolation using the pointer to the top pixel and the pixel's distance between
- * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
- *
- * @param[in] pixel_ptr Pointer to the top pixel value of a single channel input.
- * @param[in] stride Stride to access the bottom pixel value
- * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer
- *
- * @note dy must be in the range [0, 1.0]
- *
- * @return The linear interpolated pixel value
- */
-template <typename T>
-inline T delta_linear_c1_y(const T *pixel_ptr, size_t stride, float dy)
-{
- ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
-
- const float dy1 = 1.0f - dy;
-
- const T a00 = *pixel_ptr;
- const T a10 = *(pixel_ptr + stride);
-
- const float w1 = dy1;
- const float w3 = dy;
-
- return static_cast<T>(a00 * w1 + a10 * w3);
-}
-
-/** Computes linear interpolation using the pointer to the left pixel and the pixel's distance between
- * the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
- *
- * @param[in] pixel_ptr Pointer to the left pixel value of a single channel input.
- * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer
- *
- * @note dx must be in the range [0, 1.0]
- *
- * @return The linear interpolated pixel value
- */
-template <typename T>
-inline T delta_linear_c1_x(const T *pixel_ptr, float dx)
-{
- ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
-
- const T a00 = *pixel_ptr;
- const T a01 = *(pixel_ptr + 1);
-
- const float dx1 = 1.0f - dx;
-
- const float w1 = dx1;
- const float w2 = dx;
-
- return static_cast<T>(a00 * w1 + a01 * w2);
-}
-
-/** Return the pixel at (x,y) using bilinear interpolation.
- *
- * @warning Only works if the iterator was created with an IImage
- *
- * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel input.
- * @param[in] stride Stride in bytes of the image;
- * @param[in] x X position of the wanted pixel
- * @param[in] y Y position of the wanted pixel
- *
- * @return The pixel at (x, y) using bilinear interpolation.
- */
-template <typename T>
-inline T pixel_bilinear_c1(const T *first_pixel_ptr, size_t stride, float x, float y)
-{
- ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
-
- const int32_t xi = std::floor(x);
- const int32_t yi = std::floor(y);
-
- const float dx = x - xi;
- const float dy = y - yi;
-
- return delta_bilinear_c1(first_pixel_ptr + xi + yi * stride, stride, dx, dy);
-}
-
-/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel input
- *
- * @warning Only works if the iterator was created with an IImage
- *
- * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel image.
- * @param[in] stride Stride in bytes of the image
- * @param[in] width Width of the image
- * @param[in] height Height of the image
- * @param[in] x X position of the wanted pixel
- * @param[in] y Y position of the wanted pixel
- *
- * @return The pixel at (x, y) using bilinear interpolation.
- */
-template <typename T>
-inline uint8_t
-pixel_bilinear_c1_clamp(const T *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y)
-{
- ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
-
- x = std::max(-1.f, std::min(x, static_cast<float>(width)));
- y = std::max(-1.f, std::min(y, static_cast<float>(height)));
-
- const float xi = std::floor(x);
- const float yi = std::floor(y);
-
- const float dx = x - xi;
- const float dy = y - yi;
-
- if(dx == 0.0f)
- {
- if(dy == 0.0f)
- {
- return static_cast<T>(first_pixel_ptr[static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride]);
- }
- return delta_linear_c1_y(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride,
- stride, dy);
- }
- if(dy == 0.0f)
- {
- return delta_linear_c1_x(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride,
- dx);
- }
- return delta_bilinear_c1(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, stride,
- dx, dy);
-}
-
/** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8
*
* @note The interpolation area depends on the width and height ration of the input and output images
@@ -279,9 +130,8 @@ pixel_bilinear_c1_clamp(const T *first_pixel_ptr, size_t stride, size_t width, s
*
* @return The pixel at (x, y) using area interpolation.
*/
-inline uint8_t
-pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr,
- float hr, int x, int y)
+inline uint8_t pixel_area_c1u8_clamp(
+ const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y)
{
ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr);
@@ -316,7 +166,7 @@ pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t widt
// Sum pixels in area
int sum = 0;
- for(int j = yi + y_from, je = yi + y_to; j <= je; ++j)
+ for (int j = yi + y_from, je = yi + y_to; j <= je; ++j)
{
const uint8_t *ptr = first_pixel_ptr + j * stride + xi + x_from;
sum = std::accumulate(ptr, ptr + x_elements, sum);
@@ -325,6 +175,32 @@ pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t widt
// Return average
return sum / (x_elements * y_elements);
}
+
+/** Computes bilinear interpolation using the top-left, top-right, bottom-left, bottom-right pixels and the pixel's distance between
+ * the real coordinates and the smallest following integer coordinates.
+ *
+ * @param[in] a00 The top-left pixel value.
+ * @param[in] a01 The top-right pixel value.
+ * @param[in] a10 The bottom-left pixel value.
+ * @param[in] a11 The bottom-right pixel value.
+ * @param[in] dx_val Pixel's distance between the X real coordinate and the smallest X following integer
+ * @param[in] dy_val Pixel's distance between the Y real coordinate and the smallest Y following integer
+ *
+ * @note dx and dy must be in the range [0, 1.0]
+ *
+ * @return The bilinear interpolated pixel value
+ */
+inline float delta_bilinear(float a00, float a01, float a10, float a11, float dx_val, float dy_val)
+{
+ const float dx1_val = 1.0f - dx_val;
+ const float dy1_val = 1.0f - dy_val;
+
+ const float w1 = dx1_val * dy1_val;
+ const float w2 = dx_val * dy1_val;
+ const float w3 = dx1_val * dy_val;
+ const float w4 = dx_val * dy_val;
+ return a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4;
+}
} // namespace scale_helpers
} // namespace arm_compute
diff --git a/src/core/helpers/SoftmaxHelpers.cpp b/src/core/helpers/SoftmaxHelpers.cpp
index 71b971af31..8184991ab5 100644
--- a/src/core/helpers/SoftmaxHelpers.cpp
+++ b/src/core/helpers/SoftmaxHelpers.cpp
@@ -29,7 +29,7 @@ namespace softmax_helpers
{
PermutationVector get_permutation_vector_from_softmax_axis(size_t axis)
{
- switch(axis)
+ switch (axis)
{
case 1:
return PermutationVector(1U, 0U, 2U, 3U);
diff --git a/src/core/helpers/Utils.cpp b/src/core/helpers/Utils.cpp
new file mode 100644
index 0000000000..f8895d8a3c
--- /dev/null
+++ b/src/core/helpers/Utils.cpp
@@ -0,0 +1,49 @@
+/*
+* Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/helpers/Utils.h"
+
+namespace arm_compute
+{
+bool has_holes(const ITensorInfo &info)
+{
+ return has_holes(info, info.num_dimensions() - 1);
+}
+
+bool has_holes(const ITensorInfo &info, size_t dimension)
+{
+ const auto &shape = info.tensor_shape();
+ const auto &strides = info.strides_in_bytes();
+ size_t squashed_bytes = info.element_size();
+
+ for (size_t dim = 0; dim <= dimension; ++dim)
+ {
+ if (strides[dim] != squashed_bytes)
+ {
+ return true;
+ }
+ squashed_bytes *= shape[dim];
+ }
+ return false;
+}
+} // namespace arm_compute
diff --git a/src/core/helpers/Utils.h b/src/core/helpers/Utils.h
index 3c3b2b93f9..a17a78f7ee 100644
--- a/src/core/helpers/Utils.h
+++ b/src/core/helpers/Utils.h
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2020 Arm Limited.
+* Copyright (c) 2020-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_CORE_HELPERS_UTILS_H
-#define SRC_CORE_HELPERS_UTILS_H
+#ifndef ACL_SRC_CORE_HELPERS_UTILS_H
+#define ACL_SRC_CORE_HELPERS_UTILS_H
#include "arm_compute/core/ITensorInfo.h"
@@ -38,14 +38,14 @@ namespace arm_compute
* calculated based on the tensor shape and the strides of lower dimensions.
*/
template <typename T, typename... Ts>
-inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fixed_strides)
+inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&...fixed_strides)
{
const TensorShape &shape = info.tensor_shape();
// Create strides object
Strides strides(stride_x, fixed_strides...);
- for(size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i)
+ for (size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i)
{
strides.set(i, shape[i - 1] * strides[i - 1]);
}
@@ -92,6 +92,29 @@ inline unsigned int get_next_power_two(unsigned int x)
return x;
}
+
+/** Check if the tensor has any holes.
+ *
+ * A hole is defined as any gap in the tensor between two consecutive values. This can be a result of extending
+ * the paddings or manipulating the strides of the tensor
+ *
+ * @param[in] info Tensor info object defining the shape of the input tensor.
+ *
+ * @note This function checks for holes in all dimensions.
+ *
+ */
+bool has_holes(const ITensorInfo &info);
+
+/** Check if the tensor has any holes.
+ *
+ * @param[in] info Tensor info object defining the shape of the input tensor.
+ * @param[in] dimension Highest dimension to check.
+ *
+ * @note This function checks for holes in all the dimensions upto and including the highest dimension.
+ *
+ */
+bool has_holes(const ITensorInfo &info, size_t dimension);
+
} // namespace arm_compute
-#endif /* SRC_CORE_HELPERS_UTILS_H */
+#endif // ACL_SRC_CORE_HELPERS_UTILS_H
diff --git a/src/core/helpers/WindowHelpers.cpp b/src/core/helpers/WindowHelpers.cpp
index ba10eb9775..30a55fcbc6 100644
--- a/src/core/helpers/WindowHelpers.cpp
+++ b/src/core/helpers/WindowHelpers.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2020 Arm Limited.
+* Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,9 +25,10 @@
namespace arm_compute
{
-Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
+Window
+calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
{
- if(!skip_border)
+ if (!skip_border)
{
border_size = BorderSize(0);
}
@@ -38,40 +39,104 @@ Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps,
Window window;
window.set(0, Window::Dimension(
- // Skip the border left of the image
- anchor[0] + border_size.left,
- // Skip the border right of the image
- // Make sure the window width is a multiple of the step size
- anchor[0] + border_size.left + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - static_cast<int>(border_size.right)), steps[0]),
- steps[0]));
+ // Skip the border left of the image
+ anchor[0] + border_size.left,
+ // Skip the border right of the image
+ // Make sure the window width is a multiple of the step size
+ anchor[0] + border_size.left +
+ ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) -
+ static_cast<int>(border_size.right)),
+ steps[0]),
+ steps[0]));
size_t n = 1;
- if(anchor.num_dimensions() > 1)
+ if (anchor.num_dimensions() > 1)
{
- window.set(1, Window::Dimension(
+ window.set(1,
+ Window::Dimension(
// Skip the border above the image
anchor[1] + border_size.top,
// Skip the border below the image
- anchor[1] + border_size.top + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - static_cast<int>(border_size.top) - static_cast<int>(border_size.bottom)), steps[1]),
+ anchor[1] + border_size.top +
+ ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - static_cast<int>(border_size.top) -
+ static_cast<int>(border_size.bottom)),
+ steps[1]),
steps[1]));
++n;
}
- if(anchor.num_dimensions() > 2)
+ if (anchor.num_dimensions() > 2)
{
window.set(2, Window::Dimension(anchor[2], std::max<size_t>(1, shape[2]), steps[2]));
++n;
}
- for(; n < anchor.num_dimensions(); ++n)
+ for (; n < anchor.num_dimensions(); ++n)
{
window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n])));
}
- for(; n < Coordinates::num_max_dimensions; ++n)
+ for (; n < Coordinates::num_max_dimensions; ++n)
+ {
+ window.set(n, Window::Dimension(0, 1));
+ }
+
+ return window;
+}
+
+Window calculate_max_window(const TensorShape &shape, const Steps &steps, bool skip_border, BorderSize border_size)
+{
+ if (!skip_border)
+ {
+ border_size = BorderSize(0);
+ }
+
+ Window window;
+
+ window.set(0, Window::Dimension(
+ // Skip the border left of the image
+ border_size.left,
+ // Skip the border right of the image
+ // Make sure the window width is a multiple of the step size
+ border_size.left +
+ ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) -
+ static_cast<int>(border_size.right)),
+ steps[0]),
+ steps[0]));
+
+ size_t n = 1;
+
+ if (shape.num_dimensions() > 1)
+ {
+ window.set(1, Window::Dimension(
+ // Skip the border above the image
+ border_size.top,
+ // Skip the border below the image
+ border_size.top + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) -
+ static_cast<int>(border_size.top) -
+ static_cast<int>(border_size.bottom)),
+ steps[1]),
+ steps[1]));
+
+ ++n;
+ }
+
+ if (shape.num_dimensions() > 2)
+ {
+ window.set(2, Window::Dimension(0, std::max<size_t>(1, shape[2]), steps[2]));
+
+ ++n;
+ }
+
+ for (; n < shape.num_dimensions(); ++n)
+ {
+ window.set(n, Window::Dimension(0, std::max<size_t>(1, shape[n])));
+ }
+
+ for (; n < Coordinates::num_max_dimensions; ++n)
{
window.set(n, Window::Dimension(0, 1));
}
@@ -87,40 +152,42 @@ Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Step
Window window;
window.set(0, Window::Dimension(
- // move the anchor to the start from the border
- anchor[0] - border_size.left,
- // move the anchor to include the right end border
- // Make sure the window width is a multiple of the step size
- anchor[0] - border_size.left + ceil_to_multiple(shape[0] + border_size.left + border_size.right, steps[0]),
- steps[0]));
+ // move the anchor to the start from the border
+ anchor[0] - border_size.left,
+ // move the anchor to include the right end border
+ // Make sure the window width is a multiple of the step size
+ anchor[0] - border_size.left +
+ ceil_to_multiple(shape[0] + border_size.left + border_size.right, steps[0]),
+ steps[0]));
size_t n = 1;
- if(anchor.num_dimensions() > 1)
+ if (anchor.num_dimensions() > 1)
{
window.set(1, Window::Dimension(
- // Include the border above the image
- anchor[1] - border_size.top,
- // Include the border below the image
- anchor[1] - border_size.top + ceil_to_multiple(shape[1] + border_size.top + border_size.bottom, steps[1]),
- steps[1]));
+ // Include the border above the image
+ anchor[1] - border_size.top,
+ // Include the border below the image
+ anchor[1] - border_size.top +
+ ceil_to_multiple(shape[1] + border_size.top + border_size.bottom, steps[1]),
+ steps[1]));
++n;
}
- if(anchor.num_dimensions() > 2)
+ if (anchor.num_dimensions() > 2)
{
window.set(2, Window::Dimension(0, std::max<size_t>(1, shape[n]), steps[2]));
++n;
}
- for(; n < anchor.num_dimensions(); ++n)
+ for (; n < anchor.num_dimensions(); ++n)
{
window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n])));
}
- for(; n < Coordinates::num_max_dimensions; ++n)
+ for (; n < Coordinates::num_max_dimensions; ++n)
{
window.set(n, Window::Dimension(0, 1));
}
@@ -128,9 +195,12 @@ Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Step
return window;
}
-Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
+Window calculate_max_window_horizontal(const ValidRegion &valid_region,
+ const Steps &steps,
+ bool skip_border,
+ BorderSize border_size)
{
- if(skip_border)
+ if (skip_border)
{
border_size.top = 0;
border_size.bottom = 0;
@@ -147,37 +217,133 @@ Window calculate_max_window_horizontal(const ValidRegion &valid_region, const St
Window window;
window.set(0, Window::Dimension(
- // Skip the border left of the image
- anchor[0] + border_size.left,
- // Skip the border right of the image
- // Make sure the window width is a multiple of the step size
- anchor[0] + border_size.left + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - static_cast<int>(border_size.right)), steps[0]),
- steps[0]));
+ // Skip the border left of the image
+ anchor[0] + border_size.left,
+ // Skip the border right of the image
+ // Make sure the window width is a multiple of the step size
+ anchor[0] + border_size.left +
+ ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) -
+ static_cast<int>(border_size.right)),
+ steps[0]),
+ steps[0]));
size_t n = 1;
- if(anchor.num_dimensions() > 1)
+ if (anchor.num_dimensions() > 1)
{
window.set(1, Window::Dimension(
- // Skip the border above the image
- anchor[1] - border_size.top,
- // Skip the border below the image
- anchor[1] + shape[1] + border_size.bottom,
- 1));
+ // Skip the border above the image
+ anchor[1] - border_size.top,
+ // Skip the border below the image
+ anchor[1] + shape[1] + border_size.bottom, 1));
++n;
}
- for(; n < anchor.num_dimensions(); ++n)
+ for (; n < anchor.num_dimensions(); ++n)
{
window.set(n, Window::Dimension(anchor[n], std::max<size_t>(1, shape[n])));
}
- for(; n < Coordinates::num_max_dimensions; ++n)
+ for (; n < Coordinates::num_max_dimensions; ++n)
{
window.set(n, Window::Dimension(0, 1));
}
return window;
}
+
+std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1)
+{
+ const auto &shape0 = src0.tensor_shape();
+ const auto &shape1 = src1.tensor_shape();
+ const auto &strides0 = src0.strides_in_bytes();
+ const auto &strides1 = src1.strides_in_bytes();
+ const auto num_dimensions = std::max(src0.num_dimensions(), src1.num_dimensions());
+
+ Window win;
+ size_t split_dimension = Window::DimY;
+ size_t dim = 0;
+
+ size_t squashed_bytes = src0.element_size();
+
+ // Try to squash the low dimensions together.
+ for (; dim < num_dimensions; ++dim)
+ {
+ if (shape0[dim] != shape1[dim] || strides0[dim] != squashed_bytes || strides1[dim] != squashed_bytes)
+ {
+ break;
+ }
+
+ squashed_bytes *= shape0[dim];
+ }
+
+ if (dim == num_dimensions)
+ {
+ auto squashed_elements = squashed_bytes / src0.element_size();
+
+ split_dimension = Window::DimX;
+
+ // The input tensors can be interpreted as 1D array.
+ win.set(0, Window::Dimension(0, squashed_elements, 1));
+
+ for (dim = 1; dim < Coordinates::num_max_dimensions; ++dim)
+ {
+ win.set(dim, Window::Dimension(0, 1, 1));
+ }
+ }
+ else
+ {
+ // Generates the max window.
+ for (dim = 0; dim < Coordinates::num_max_dimensions; ++dim)
+ {
+ win.set(dim, Window::Dimension(0, std::max(shape0[dim], shape1[dim]), 1));
+ }
+ }
+
+ return std::make_pair(win, split_dimension);
+}
+
+std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src)
+{
+ const auto &shape = src.tensor_shape();
+ const auto &strides = src.strides_in_bytes();
+ const auto num_dimensions = src.num_dimensions();
+
+ Window win;
+ size_t split_dimension = Window::DimY;
+ size_t dim = 0;
+ size_t squashed_bytes = src.element_size();
+
+ // Try to squash the low dimensions together.
+ for (; dim < num_dimensions; ++dim)
+ {
+ if (strides[dim] != squashed_bytes)
+ {
+ break;
+ }
+ squashed_bytes *= shape[dim];
+ }
+ if (dim == num_dimensions)
+ {
+ const auto squashed_elements = squashed_bytes / src.element_size();
+ split_dimension = Window::DimX;
+ // The input tensor can be interpreted as 1D array.
+ win.set(0, Window::Dimension(0, squashed_elements, 1));
+ for (dim = 1; dim < Coordinates::num_max_dimensions; ++dim)
+ {
+ win.set(dim, Window::Dimension(0, 1, 1));
+ }
+ }
+ else
+ {
+ // Generate the max window.
+ for (dim = 0; dim < Coordinates::num_max_dimensions; ++dim)
+ {
+ win.set(dim, Window::Dimension(0, shape[dim], 1));
+ }
+ }
+ return std::make_pair(win, split_dimension);
+}
+
} // namespace arm_compute
diff --git a/src/core/helpers/WindowHelpers.h b/src/core/helpers/WindowHelpers.h
index 9bc2135b6d..e404c18e8a 100644
--- a/src/core/helpers/WindowHelpers.h
+++ b/src/core/helpers/WindowHelpers.h
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2020 Arm Limited.
+* Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,23 +43,13 @@ namespace arm_compute
* influence the returned value.
*/
template <typename... Ts>
-bool update_window_and_padding(Window &win, Ts &&... patterns)
+bool update_window_and_padding(Window &win, Ts &&...patterns)
{
bool window_changed = false;
- utility::for_each([&](const IAccessWindow & w)
- {
- window_changed |= w.update_window_if_needed(win);
- },
- patterns...);
-
- bool padding_changed = false;
+ utility::for_each([&](const IAccessWindow &w) { window_changed |= w.update_window_if_needed(win); }, patterns...);
- utility::for_each([&](IAccessWindow & w)
- {
- padding_changed |= w.update_padding_if_needed(win);
- },
- patterns...);
+ utility::for_each([&](IAccessWindow &w) { w.update_padding_if_needed(win); }, patterns...);
return window_changed;
}
@@ -71,18 +61,18 @@ bool update_window_and_padding(Window &win, Ts &&... patterns)
* @return Intersection of all regions.
*/
template <typename... Ts>
-ValidRegion intersect_valid_regions(const Ts &... regions)
+ValidRegion intersect_valid_regions(const Ts &...regions)
{
- auto intersect = [](const ValidRegion & r1, const ValidRegion & r2) -> ValidRegion
+ auto intersect = [](const ValidRegion &r1, const ValidRegion &r2) -> ValidRegion
{
ValidRegion region;
- for(size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d)
+ for (size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d)
{
region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d]));
}
- for(size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d)
+ for (size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d)
{
region.shape.set(d, std::min(r1.shape[d], r2.shape[d]));
}
@@ -103,7 +93,24 @@ ValidRegion intersect_valid_regions(const Ts &... regions)
*
* @return The maximum window the kernel can be executed on.
*/
-Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize());
+Window calculate_max_window(const ValidRegion &valid_region,
+ const Steps &steps = Steps(),
+ bool skip_border = false,
+ BorderSize border_size = BorderSize());
+
+/** Calculate the maximum window for a given tensor shape and border setting
+ *
+ * @param[in] shape Shape of the tensor space
+ * @param[in] steps (Optional) Number of elements processed for each step.
+ * @param[in] skip_border (Optional) If true exclude the border region from the window.
+ * @param[in] border_size (Optional) Border size.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+Window calculate_max_window(const TensorShape &shape,
+ const Steps &steps = Steps(),
+ bool skip_border = false,
+ BorderSize border_size = BorderSize());
/** Calculate the maximum window for a given tensor shape and border setting
*
@@ -114,9 +121,12 @@ Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps
*
* @return The maximum window the kernel can be executed on.
*/
-inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize())
+inline Window calculate_max_window(const ITensorInfo &info,
+ const Steps &steps = Steps(),
+ bool skip_border = false,
+ BorderSize border_size = BorderSize())
{
- return calculate_max_window(info.valid_region(), steps, skip_border, border_size);
+ return calculate_max_window(info.tensor_shape(), steps, skip_border, border_size);
}
/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting
@@ -128,7 +138,10 @@ inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps =
*
* @return The maximum window the kernel can be executed on.
*/
-Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize());
+Window calculate_max_window_horizontal(const ValidRegion &valid_region,
+ const Steps &steps = Steps(),
+ bool skip_border = false,
+ BorderSize border_size = BorderSize());
/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting
*
@@ -139,7 +152,10 @@ Window calculate_max_window_horizontal(const ValidRegion &valid_region, const St
*
* @return The maximum window the kernel can be executed on.
*/
-inline Window calculate_max_window_horizontal(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize())
+inline Window calculate_max_window_horizontal(const ITensorInfo &info,
+ const Steps &steps = Steps(),
+ bool skip_border = false,
+ BorderSize border_size = BorderSize())
{
return calculate_max_window_horizontal(info.valid_region(), steps, skip_border, border_size);
}
@@ -152,7 +168,9 @@ inline Window calculate_max_window_horizontal(const ITensorInfo &info, const Ste
*
* @return The maximum window the kernel can be executed on.
*/
-Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps = Steps(), BorderSize border_size = BorderSize());
+Window calculate_max_enlarged_window(const ValidRegion &valid_region,
+ const Steps &steps = Steps(),
+ BorderSize border_size = BorderSize());
/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border.
*
@@ -162,10 +180,50 @@ Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Step
*
* @return The maximum window the kernel can be executed on.
*/
-inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps &steps = Steps(), BorderSize border_size = BorderSize())
+inline Window calculate_max_enlarged_window(const ITensorInfo &info,
+ const Steps &steps = Steps(),
+ BorderSize border_size = BorderSize())
{
return calculate_max_enlarged_window(info.valid_region(), steps, border_size);
}
+
+/** Calculate the squashed or maximum window for the given tensor shape.
+ *
+ * If the tensor data resides continuously in the memory, the tensor can be interpreted
+ * as 1D array and all the dimensions can be squashed together into the x-dimension.
+ * Otherwise, generate the max window for the given tensor shape.
+ *
+ * @param[in] src Tensor info object defining the shape of the input tensor.
+ *
+ * @return The maximum window the kernel can be executed on and the preferred split dimension.
+ */
+std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src);
+
+/** Calculate the squashed or maximum window for the given tensor shapes.
+ *
+ * If the tensor data resides continuously in the memory, the tensor can be interpreted
+ * as 1D array and all the dimensions can be squashed together into the x-dimension.
+ * Otherwise, generate the max window for the given tensor shapes.
+ *
+ * @param[in] src0 Tensor info object defining the shape of the first input tensor.
+ * @param[in] src1 Tensor info object defining the shape of the second input tensor.
+ *
+ * @return The squashed or maximum window the kernel can be executed on and the preferred split dimension.
+ */
+std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1);
+
+/** Function to compute the shape of output and window for the given inputs
+ *
+ * @param[in] infos Input tensor informations
+ *
+ * @return A pair of the shape and window
+ */
+template <typename... Shapes>
+std::pair<TensorShape, Window> compute_output_shape_and_window(const Shapes &...shapes)
+{
+ const TensorShape out_shape = TensorShape::broadcast_shape(shapes...);
+ return std::make_pair(out_shape, calculate_max_window(out_shape));
+}
#endif /* DOXYGEN_SKIP_THIS */
} // namespace arm_compute