diff options
Diffstat (limited to 'arm_compute/core/utils/misc')
-rw-r--r-- | arm_compute/core/utils/misc/CRTP.h | 55 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/Cast.h | 119 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/ICloneable.h | 48 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/InfoHelpers.h | 62 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/Iterable.h | 108 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/MMappedFile.h | 6 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/Macros.h | 11 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/Random.h | 98 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/Requires.h | 51 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/Rounding.h | 205 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/SaturateCast.h | 218 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/ShapeCalculator.h | 779 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/Traits.h | 4 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/Utility.h | 57 |
14 files changed, 644 insertions, 1177 deletions
diff --git a/arm_compute/core/utils/misc/CRTP.h b/arm_compute/core/utils/misc/CRTP.h deleted file mode 100644 index 037c69ab1d..0000000000 --- a/arm_compute/core/utils/misc/CRTP.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MISC_CRTP_H -#define ARM_COMPUTE_MISC_CRTP_H - -namespace arm_compute -{ -namespace misc -{ -/** Curiously recurring template pattern Interface */ -template <typename T, template <typename> class Type> -struct CRTP -{ -public: - /** Exact type */ - using ExactType = T; - -protected: - const T &impl() const - { - return static_cast<const T &>(*this); - } - T &impl() - { - return static_cast<T &>(*this); - } - -private: - CRTP() = default; - friend Type<T>; -}; -} // namespace misc -} // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_CRTP_H */ diff --git a/arm_compute/core/utils/misc/Cast.h b/arm_compute/core/utils/misc/Cast.h deleted file mode 100644 index fc6246aace..0000000000 --- a/arm_compute/core/utils/misc/Cast.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MISC_CAST_H -#define ARM_COMPUTE_MISC_CAST_H - -#include "arm_compute/core/Error.h" - -namespace arm_compute -{ -namespace utils -{ -namespace cast -{ -/** Polymorphic cast between two types - * - * @warning Will throw an exception if cast cannot take place - * - * @tparam Target Target to cast type - * @tparam Source Source from cast type - * - * @param[in] v Value to cast - * - * @return The casted value - */ -template <typename Target, typename Source> -inline Target polymorphic_cast(Source *v) -{ - if(dynamic_cast<Target>(v) == nullptr) - { - ARM_COMPUTE_THROW(std::bad_cast()); - } - return static_cast<Target>(v); -} - -/** Polymorphic down cast between two types - * - * @warning Will assert if cannot take place - * - * @tparam Target Target to cast type - * @tparam Source Source from cast type - * - * @param[in] v Value to cast - * - * @return The casted value - */ -template <typename Target, typename Source> -inline Target polymorphic_downcast(Source *v) -{ - ARM_COMPUTE_ERROR_ON(dynamic_cast<Target>(v) != static_cast<Target>(v)); - return static_cast<Target>(v); -} - -/** Polymorphic cast between two unique pointer types - * - * @warning Will throw an exception if cast cannot take place - * - * @tparam Target Target to cast type - * @tparam Source Source from cast type - * @tparam Deleter Deleter function type - * - * @param[in] v Value to cast - * - * @return The casted value - */ -template <typename Target, typename Source, typename Deleter> -std::unique_ptr<Target, Deleter> polymorphic_cast_unique_ptr(std::unique_ptr<Source, Deleter> &&v) -{ - if(dynamic_cast<Target *>(v.get()) == nullptr) - { - ARM_COMPUTE_THROW(std::bad_cast()); - } - auto r = static_cast<Target *>(v.release()); - return std::unique_ptr<Target, Deleter>(r, std::move(v.get_deleter())); -} - -/** Polymorphic down cast between two unique pointer types - * - * @warning Will assert if cannot take place - * - * @tparam Target Target to cast type - * @tparam Source Source from cast type - * @tparam Deleter Deleter function type - * - * @param[in] v Value to cast - * - * @return The casted value - */ -template <typename Target, typename Source, typename Deleter> -std::unique_ptr<Target, Deleter> polymorphic_downcast_unique_ptr(std::unique_ptr<Source, Deleter> &&v) -{ - ARM_COMPUTE_ERROR_ON(dynamic_cast<Target *>(v.get()) != static_cast<Target *>(v.get())); - auto r = static_cast<Target *>(v.release()); - return std::unique_ptr<Target, Deleter>(r, std::move(v.get_deleter())); -} -} // namespace cast -} // namespace utils -} // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_CAST_H */ diff --git a/arm_compute/core/utils/misc/ICloneable.h b/arm_compute/core/utils/misc/ICloneable.h deleted file mode 100644 index 064f408201..0000000000 --- a/arm_compute/core/utils/misc/ICloneable.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MISC_ICLONEABLE_H -#define ARM_COMPUTE_MISC_ICLONEABLE_H - -#include <memory> - -namespace arm_compute -{ -namespace misc -{ -/** Clonable Interface */ -template <class T> -class ICloneable -{ -public: - /** Default virtual desctructor */ - virtual ~ICloneable() = default; - /** Provide a clone of the current object of class T - * - * @return Clone object of class T - */ - virtual std::unique_ptr<T> clone() const = 0; -}; -} // namespace misc -} // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_ICLONEABLE_H */ diff --git a/arm_compute/core/utils/misc/InfoHelpers.h b/arm_compute/core/utils/misc/InfoHelpers.h index c6ee7c9031..1d1b4ea8d7 100644 --- a/arm_compute/core/utils/misc/InfoHelpers.h +++ b/arm_compute/core/utils/misc/InfoHelpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 ARM Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -53,10 +53,12 @@ inline bool is_relu(ActivationLayerInfo activation_info) */ inline bool is_relu6(ActivationLayerInfo activation_info) { - const bool is_lu_bounded_relu = activation_info.activation() == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU - && activation_info.a() == 6.f && activation_info.b() == 0.f; - const bool is_bounded_relu = activation_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU - && activation_info.a() == 6.f; + const bool is_lu_bounded_relu = + activation_info.activation() == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU && + activation_info.a() == 6.f && activation_info.b() == 0.f; + const bool is_bounded_relu = + activation_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && + activation_info.a() == 6.f; return activation_info.enabled() && (is_lu_bounded_relu || is_bounded_relu); } @@ -68,50 +70,52 @@ inline bool is_relu6(ActivationLayerInfo activation_info) * */ template <typename T> -inline void build_lstm_params_tensor_info(const LSTMParams<T> &lstm_params, - LSTMParams<ITensorInfo> *lstm_params_info) +inline void build_lstm_params_tensor_info(const LSTMParams<T> &lstm_params, LSTMParams<ITensorInfo> *lstm_params_info) { - if(lstm_params.has_peephole_opt()) + if (lstm_params.has_peephole_opt()) { ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights()); - lstm_params_info->set_peephole_params(lstm_params.cell_to_forget_weights()->info(), lstm_params.cell_to_output_weights()->info()); + lstm_params_info->set_peephole_params(lstm_params.cell_to_forget_weights()->info(), + lstm_params.cell_to_output_weights()->info()); } - if(lstm_params.has_projection()) + if (lstm_params.has_projection()) { ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.projection_weights()); - lstm_params_info->set_projection_params(lstm_params.projection_weights()->info(), - lstm_params.projection_bias() != nullptr ? lstm_params.projection_bias()->info() : nullptr); + lstm_params_info->set_projection_params( + lstm_params.projection_weights()->info(), + lstm_params.projection_bias() != nullptr ? lstm_params.projection_bias()->info() : nullptr); } - if(!lstm_params.has_cifg_opt()) + if (!lstm_params.has_cifg_opt()) { - ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(), lstm_params.input_gate_bias()); + ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(), + lstm_params.input_gate_bias()); - const ITensorInfo *cell_to_input_weights_info = (lstm_params.has_peephole_opt()) ? lstm_params.cell_to_input_weights()->info() : nullptr; - lstm_params_info->set_cifg_params(lstm_params.input_to_input_weights()->info(), lstm_params.recurrent_to_input_weights()->info(), - cell_to_input_weights_info, lstm_params.input_gate_bias()->info()); + ITensorInfo *cell_to_input_weights_info = + (lstm_params.has_peephole_opt()) ? lstm_params.cell_to_input_weights()->info() : nullptr; + lstm_params_info->set_cifg_params(lstm_params.input_to_input_weights()->info(), + lstm_params.recurrent_to_input_weights()->info(), cell_to_input_weights_info, + lstm_params.input_gate_bias()->info()); } - if(lstm_params.use_layer_norm()) + if (lstm_params.use_layer_norm()) { - ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(), - lstm_params.output_layer_norm_weights(), + ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(), lstm_params.output_layer_norm_weights(), lstm_params.cell_layer_norm_weights()); - if(!lstm_params.has_cifg_opt()) + if (!lstm_params.has_cifg_opt()) { ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_layer_norm_weights()); } - const ITensorInfo *forget_info = lstm_params.forget_layer_norm_weights()->info(); - const ITensorInfo *cell_info = lstm_params.cell_layer_norm_weights()->info(); - const ITensorInfo *output_info = lstm_params.output_layer_norm_weights()->info(); - const ITensorInfo *input_info = lstm_params.has_cifg_opt() ? nullptr : lstm_params.input_layer_norm_weights()->info(); + ITensorInfo *forget_info = lstm_params.forget_layer_norm_weights()->info(); + ITensorInfo *cell_info = lstm_params.cell_layer_norm_weights()->info(); + ITensorInfo *output_info = lstm_params.output_layer_norm_weights()->info(); + ITensorInfo *input_info = lstm_params.has_cifg_opt() ? nullptr : lstm_params.input_layer_norm_weights()->info(); lstm_params_info->set_layer_normalization_params(input_info, forget_info, cell_info, output_info); } - lstm_params_info->set_matmul_scale_params(lstm_params.input_intermediate_scale(), - lstm_params.forget_intermediate_scale(), - lstm_params.cell_intermediate_scale(), - lstm_params.output_intermediate_scale()); + lstm_params_info->set_matmul_scale_params( + lstm_params.input_intermediate_scale(), lstm_params.forget_intermediate_scale(), + lstm_params.cell_intermediate_scale(), lstm_params.output_intermediate_scale()); lstm_params_info->set_hidden_state_params(lstm_params.hidden_state_zero(), lstm_params.hidden_state_scale()); } diff --git a/arm_compute/core/utils/misc/Iterable.h b/arm_compute/core/utils/misc/Iterable.h deleted file mode 100644 index 829c4b44a8..0000000000 --- a/arm_compute/core/utils/misc/Iterable.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MISC_ITERABLE_H -#define ARM_COMPUTE_MISC_ITERABLE_H - -#include <iterator> - -namespace arm_compute -{ -namespace utils -{ -namespace iterable -{ -/** Reverse range iterable class - * - * @tparam T Type to create a reverse range on - */ -template <typename T> -class reverse_iterable -{ -public: - /** Default constructor - * - * @param[in] it Value to reverse iterate on - */ - explicit reverse_iterable(T &it) - : _it(it) - { - } - - /** Get beginning of iterator. - * - * @return beginning of iterator. - */ - typename T::reverse_iterator begin() - { - return _it.rbegin(); - } - - /** Get end of iterator. - * - * @return end of iterator. - */ - typename T::reverse_iterator end() - { - return _it.rend(); - } - - /** Get beginning of const iterator. - * - * @return beginning of const iterator. - */ - typename T::const_reverse_iterator cbegin() - { - return _it.rbegin(); - } - - /** Get end of const iterator. - * - * @return end of const iterator. - */ - typename T::const_reverse_iterator cend() - { - return _it.rend(); - } - -private: - T &_it; -}; - -/** Creates a reverse iterable for a given type - * - * @tparam T Type to create a reverse iterable on - * - * @param[in] val Iterable input - * - * @return Reverse iterable container - */ -template <typename T> -reverse_iterable<T> reverse_iterate(T &val) -{ - return reverse_iterable<T>(val); -} -} // namespace iterable -} // namespace utils -} // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_ITERABLE_H */ diff --git a/arm_compute/core/utils/misc/MMappedFile.h b/arm_compute/core/utils/misc/MMappedFile.h index 7669c5cc96..3efdbc5bda 100644 --- a/arm_compute/core/utils/misc/MMappedFile.h +++ b/arm_compute/core/utils/misc/MMappedFile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ #ifndef ARM_COMPUTE_MISC_MMAPPED_FILE_H #define ARM_COMPUTE_MISC_MMAPPED_FILE_H -#if !defined(BARE_METAL) +#if !defined(_WIN64) && !defined(BARE_METAL) #include <string> #include <utility> @@ -105,6 +105,6 @@ private: } // namespace mmap_io } // namespace utils } // namespace arm_compute -#endif // !defined(BARE_METAL) +#endif // !defined(_WIN64) &&!defined(BARE_METAL) #endif /* ARM_COMPUTE_MISC_MMAPPED_FILE_H */ diff --git a/arm_compute/core/utils/misc/Macros.h b/arm_compute/core/utils/misc/Macros.h index 6e8d7659ee..fa861fa442 100644 --- a/arm_compute/core/utils/misc/Macros.h +++ b/arm_compute/core/utils/misc/Macros.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,15 +26,16 @@ #if defined(__cplusplus) && (__cplusplus >= 201402L) -#define ARM_COMPUTE_DEPRECATED [[deprecated]] -#define ARM_COMPUTE_DEPRECATED_REL(rel) [[deprecated("Deprecated in : " #rel)]] +#define ARM_COMPUTE_DEPRECATED [[deprecated]] +#define ARM_COMPUTE_DEPRECATED_REL(rel) [[deprecated("Deprecated in : " #rel)]] #define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) [[deprecated("Deprecated in : " #rel " - Use : " #replace)]] #elif defined(__GNUC__) || defined(__clang__) -#define ARM_COMPUTE_DEPRECATED __attribute__((deprecated)) +#define ARM_COMPUTE_DEPRECATED __attribute__((deprecated)) #define ARM_COMPUTE_DEPRECATED_REL(rel) __attribute__((deprecated("Deprecated in : " #rel))) -#define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) __attribute__((deprecated("Deprecated in : " #rel " - Use : " #replace))) +#define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) \ + __attribute__((deprecated("Deprecated in : " #rel " - Use : " #replace))) #else // defined(__cplusplus) && (__cplusplus >= 201402L) diff --git a/arm_compute/core/utils/misc/Random.h b/arm_compute/core/utils/misc/Random.h deleted file mode 100644 index 9f5a128546..0000000000 --- a/arm_compute/core/utils/misc/Random.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MISC_RANDOM_H -#define ARM_COMPUTE_MISC_RANDOM_H - -#include "arm_compute/core/Error.h" - -#include <random> -#include <type_traits> - -namespace arm_compute -{ -namespace utils -{ -namespace random -{ -/** Uniform distribution within a given number of sub-ranges - * - * @tparam T Distribution primitive type - */ -template <typename T> -class RangedUniformDistribution -{ -public: - using DT = typename std::conditional<std::is_integral<T>::value, - std::uniform_int_distribution<T>, - std::uniform_real_distribution<float>>::type; - using result_type = T; - using range_pair = std::pair<result_type, result_type>; - -public: - /** Constructor - * - * @param[in] low lowest value in the range (inclusive) - * @param[in] high highest value in the range (inclusive for uniform_int_distribution, exclusive for uniform_real_distribution) - * @param[in] exclude_ranges Ranges to exclude from the generator - */ - RangedUniformDistribution(result_type low, result_type high, const std::vector<range_pair> &exclude_ranges) - : _distributions(), _selector() - { - result_type clow = low; - for(const auto &erange : exclude_ranges) - { - result_type epsilon = std::is_integral<result_type>::value ? 1 : static_cast<result_type>(std::numeric_limits<float>::epsilon()); - - ARM_COMPUTE_ERROR_ON(clow > erange.first || clow >= erange.second); - - _distributions.emplace_back(DT(clow, erange.first - epsilon)); - clow = erange.second + epsilon; - } - ARM_COMPUTE_ERROR_ON(clow > high); - _distributions.emplace_back(DT(clow, high)); - _selector = std::uniform_int_distribution<uint32_t>(0, _distributions.size() - 1); - } - /** Generate random number - * - * @tparam URNG Random number generator object type - * - * @param[in] g A uniform random number generator object, used as the source of randomness. - * - * @return A new random number. - */ - template <class URNG> - result_type operator()(URNG &g) - { - unsigned int rand_select = _selector(g); - return _distributions[rand_select](g); - } - -private: - std::vector<DT> _distributions; - std::uniform_int_distribution<uint32_t> _selector; -}; -} // namespace random -} // namespace utils -} // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_RANDOM_H */ diff --git a/arm_compute/core/utils/misc/Requires.h b/arm_compute/core/utils/misc/Requires.h deleted file mode 100644 index 33c6fa3096..0000000000 --- a/arm_compute/core/utils/misc/Requires.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_REQUIRES_H -#define ARM_COMPUTE_UTILS_REQUIRES_H - -namespace arm_compute -{ -namespace utils -{ -namespace requires -{ -// *INDENT-OFF* -// clang-format off -namespace detail -{ -enum class enabler -{ -}; -} // namespace arm_compute - -/** Requirements as template */ -#define REQUIRES_T(...) template <bool Cond = (__VA_ARGS__), typename std::enable_if<Cond, int>::type = 0> -/** Requirements as template argument */ -#define REQUIRES_TA(...) typename = typename std::enable_if<(__VA_ARGS__), arm_compute::utils::requires::detail::enabler>::type -// clang-format on -// *INDENT-ON* -} // namespace requires -} // namespace utils -} // namespace arm_compute -#endif /*ARM_COMPUTE_UTILS_REQUIRES_H */ diff --git a/arm_compute/core/utils/misc/Rounding.h b/arm_compute/core/utils/misc/Rounding.h deleted file mode 100644 index 650137a473..0000000000 --- a/arm_compute/core/utils/misc/Rounding.h +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_ROUNDING_H -#define ARM_COMPUTE_UTILS_ROUNDING_H - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/utils/misc/Requires.h" -#include "arm_compute/core/utils/misc/Traits.h" -#include "support/ToolchainSupport.h" - -#include <cmath> - -namespace arm_compute -{ -namespace utils -{ -namespace rounding -{ -/** Rounding mode */ -enum class RoundingMode -{ - TO_ZERO, /**< Round towards zero */ - AWAY_FROM_ZERO, /**< Round away from zero */ - HALF_TO_ZERO, /**< Round half towards from zero */ - HALF_AWAY_FROM_ZERO, /**< Round half away from zero */ - HALF_UP, /**< Round half towards positive infinity */ - HALF_DOWN, /**< Round half towards negative infinity */ - HALF_EVEN /**< Round half towards nearest even */ -}; - -/** Round floating-point value with round to zero - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round_to_zero(T value) -{ - T res = std::floor(std::fabs(value)); - return (value < 0.f) ? -res : res; -} - -/** Round floating-point value with round away from zero - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round_away_from_zero(T value) -{ - T res = std::ceil(std::fabs(value)); - return (value < 0.f) ? -res : res; -} - -/** Round floating-point value with half value rounding towards zero. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round_half_to_zero(T value) -{ - T res = T(std::ceil(std::fabs(value) - 0.5f)); - return (value < 0.f) ? -res : res; -} - -/** Round floating-point value with half value rounding away from zero. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round_half_away_from_zero(T value) -{ - T res = T(std::floor(std::fabs(value) + 0.5f)); - return (value < 0.f) ? -res : res; -} - -/** Round floating-point value with half value rounding to positive infinity. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round_half_up(T value) -{ - return std::floor(value + 0.5f); -} - -/** Round floating-point value with half value rounding to negative infinity. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round_half_down(T value) -{ - return std::ceil(value - 0.5f); -} - -/** Round floating-point value with half value rounding to nearest even. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * @param[in] epsilon precision. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round_half_even(T value, T epsilon = std::numeric_limits<T>::epsilon()) -{ - T positive_value = std::abs(value); - T ipart = 0; - std::modf(positive_value, &ipart); - // If 'value' is exactly halfway between two integers - if(std::abs(positive_value - (ipart + 0.5f)) < epsilon) - { - // If 'ipart' is even then return 'ipart' - if(std::fmod(ipart, 2.f) < epsilon) - { - return support::cpp11::copysign(ipart, value); - } - // Else return the nearest even integer - return support::cpp11::copysign(std::ceil(ipart + 0.5f), value); - } - // Otherwise use the usual round to closest - return support::cpp11::copysign(support::cpp11::round(positive_value), value); -} - -/** Round floating-point value given a rounding mode - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * @param[in] rounding_mode Rounding mode to use. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round(T value, RoundingMode rounding_mode) -{ - switch(rounding_mode) - { - case RoundingMode::TO_ZERO: - return round_to_zero(value); - case RoundingMode::AWAY_FROM_ZERO: - return round_away_from_zero(value); - case RoundingMode::HALF_TO_ZERO: - return round_half_to_zero(value); - case RoundingMode::HALF_AWAY_FROM_ZERO: - return round_half_away_from_zero(value); - case RoundingMode::HALF_UP: - return round_half_up(value); - case RoundingMode::HALF_DOWN: - return round_half_down(value); - case RoundingMode::HALF_EVEN: - return round_half_even(value); - default: - ARM_COMPUTE_ERROR("Unsupported rounding mode!"); - } -} -} // namespace rounding -} // namespace utils -} // namespace arm_compute -#endif /*ARM_COMPUTE_UTILS_ROUNDING_H */ diff --git a/arm_compute/core/utils/misc/SaturateCast.h b/arm_compute/core/utils/misc/SaturateCast.h deleted file mode 100644 index 0241c64b14..0000000000 --- a/arm_compute/core/utils/misc/SaturateCast.h +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H -#define ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H - -#include "arm_compute/core/utils/misc/Rounding.h" -#include "arm_compute/core/utils/misc/Traits.h" -#include "arm_compute/core/utils/misc/Utility.h" - -namespace arm_compute -{ -namespace utils -{ -namespace cast -{ -// *INDENT-OFF* -// clang-format off -// same type -template<typename T, - typename U, - typename std::enable_if<std::is_same<T, U>::value, int >::type = 0 > -T saturate_cast(U v) -{ - return v; -} - -// signed -> signed widening/same_width -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_signed<U>() && - std::is_signed<T>() && - !std::is_same<T, U>::value && - sizeof(T) >= sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(v); -} -// signed -> signed narrowing -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_signed<U>() && - std::is_signed<T>() && - !std::is_same<T, U>::value && - sizeof(T) < sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(utility::clamp<U>(v, std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max())); -} - -// unsigned -> signed widening -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_unsigned<U>() && - std::is_signed<T>() && - !std::is_same<T, U>::value && - (sizeof(T) > sizeof(U)), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(v); -} -// unsigned -> signed narrowing -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_unsigned<U>() && - std::is_signed<T>() && - !std::is_same<T, U>::value && - sizeof(T) < sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(std::min<U>(v, std::numeric_limits<T>::max())); -} -// unsigned -> signed same_width -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_unsigned<U>() && - std::is_signed<T>() && - !std::is_same<T, U>::value && - sizeof(T) == sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(std::min<U>(v, std::numeric_limits<T>::max())); -} - -// signed -> unsigned widening/same width -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_signed<U>() && - std::is_unsigned<T>() && - !std::is_same<T, U>::value && - sizeof(T) >= sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(std::max<U>(0, v)); -} - -// signed -> unsigned narrowing -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_signed<U>() && - std::is_unsigned<T>() && - !std::is_same<T, U>::value && - sizeof(T) < sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(utility::clamp<U>(v, 0, std::numeric_limits<T>::max())); -} - -// unsigned -> unsigned widening/same width -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_unsigned<T>() && - std::is_unsigned<U>() && - !std::is_same<T, U>::value && - sizeof(T) >= sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(v); -} - -// unsigned -> unsigned narrowing -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_unsigned<T>() && - std::is_unsigned<U>() && - !std::is_same<T, U>::value && - sizeof(T) < sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(utility::clamp<U>(v, std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max())); -} - -// float -> int -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - traits::is_floating_point<U>::value, - int >::type = 0 > -inline T saturate_cast(U v) -{ - int32_t vi = utils::rounding::round_half_away_from_zero(v); - return saturate_cast<T>(vi); -} - -// int -> float -template<typename T, - typename U, - typename std::enable_if<traits::is_floating_point<T>::value && - std::is_integral<U>::value, - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(v); -} - -// float -> float -template<typename T, - typename U, - typename std::enable_if<traits::is_floating_point<T>::value && - traits::is_floating_point<U>::value, - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(v); -} -// clang-format on -// *INDENT-ON* -} // namespace cast -} // namespace utils -} // namespace arm_compute -#endif /* ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H */ diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index dfccec8b37..e97d81390e 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,15 +21,16 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H -#define ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H +#ifndef ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H +#define ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Utils.h" - #include "arm_compute/core/utils/helpers/tensor_transform.h" +#include "arm_compute/function_info/ConvolutionInfo.h" +#include "arm_compute/runtime/FunctionDescriptors.h" #include <cmath> @@ -47,28 +48,35 @@ namespace shape_calculator * * @return the calculated shape */ -inline TensorShape calculate_reduce_mean_shape(ITensor *input, const Coordinates &reduction_axis, bool keep_dims) +inline TensorShape calculate_reduce_mean_shape(ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims) { const int reduction_ops = reduction_axis.num_dimensions(); Coordinates axis_local = reduction_axis; - const int input_dims = input->info()->num_dimensions(); + const int input_dims = input->num_dimensions(); convert_negative_axis(axis_local, input_dims); - TensorShape out_shape = input->info()->tensor_shape(); + TensorShape out_shape = input->tensor_shape(); // Configure reshape layer if we want to drop the dimensions - if(!keep_dims) + if (!keep_dims) { // We have to sort the reduction axis vectors in order for remove_dimension // to work properly + +// Suppress warning produced by a compiler bug in GCC +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104165 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" std::sort(axis_local.begin(), axis_local.begin() + reduction_ops); - for(int i = 0; i < reduction_ops; ++i) +#pragma GCC diagnostic pop + + for (int i = 0; i < reduction_ops; ++i) { - out_shape.remove_dimension(axis_local[i] - i); + out_shape.remove_dimension(axis_local[i] - i, false); } return out_shape; } else { - for(int i = 0; i < reduction_ops; ++i) + for (int i = 0; i < reduction_ops; ++i) { out_shape.set(axis_local[i], 1); } @@ -84,7 +92,10 @@ inline TensorShape calculate_reduce_mean_shape(ITensor *input, const Coordinates * * @return the calculated shape */ -inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input, size_t conv_w, size_t conv_h, const DataLayout &data_layout) +inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input, + size_t conv_w, + size_t conv_h, + const DataLayout &data_layout) { const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); @@ -126,10 +137,12 @@ inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL); ARM_COMPUTE_ERROR_ON(stride <= 0); - ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_width] % stride != 0), "The width of the input tensor must be a multiple of stride"); - ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_height] % stride != 0), "The height of the input tensor must be a multiple of stride"); + ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_width] % stride != 0), + "The width of the input tensor must be a multiple of stride"); + ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_height] % stride != 0), + "The height of the input tensor must be a multiple of stride"); - TensorShape output_shape{ input.tensor_shape() }; + TensorShape output_shape{input.tensor_shape()}; output_shape.set(idx_width, output_shape[idx_width] / stride); output_shape.set(idx_height, output_shape[idx_height] / stride); @@ -146,7 +159,8 @@ inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t * * @return the calculated shape of the reshaped weights */ -inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1) +inline TensorShape +compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1) { // Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. ARM_COMPUTE_ERROR_ON(num_groups == 0); @@ -154,14 +168,14 @@ inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bo ARM_COMPUTE_ERROR_ON((weights.dimension(3) % num_groups) != 0); // Calculate output shape - TensorShape weights_reshaped{ weights.tensor_shape() }; + TensorShape weights_reshaped{weights.tensor_shape()}; weights_reshaped.set(3, weights_reshaped[3] / num_groups); weights_reshaped.collapse(3); const size_t tmp_dim = weights_reshaped[0]; weights_reshaped.set(0, weights_reshaped[1]); weights_reshaped.set(1, tmp_dim + (has_bias ? 1 : 0)); - if(weights.num_dimensions() < 5) + if (weights.num_dimensions() < 5) { weights_reshaped.set(2, num_groups); } @@ -177,7 +191,9 @@ inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bo * * @return the calculated shape */ -inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false) +inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, + const GEMMLHSMatrixInfo &lhs_info, + bool reinterpret_input_as_3d = false) { ARM_COMPUTE_ERROR_ON(lhs_info.m0 == 0); ARM_COMPUTE_ERROR_ON(lhs_info.k0 == 0); @@ -198,11 +214,11 @@ inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLH const unsigned int output_width = block_size * num_horiz_blocks * lhs_info.v0; const unsigned int output_height = std::ceil(num_vert_blocks / static_cast<float>(lhs_info.v0)); - TensorShape lhs_shape{ a.tensor_shape() }; + TensorShape lhs_shape{a.tensor_shape()}; lhs_shape.set(0, output_width); lhs_shape.set(1, output_height); - if((reinterpret_input_as_3d) && (lhs_shape.num_dimensions() > 2)) + if ((reinterpret_input_as_3d) && (lhs_shape.num_dimensions() > 2)) { // When the data format is NHWC and the shapes are Nx1x1 // the tensor shape num_dimensions is automatically set to 1 instead of 3. @@ -242,7 +258,7 @@ inline TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRH const unsigned int output_width = block_size * num_vert_blocks * rhs_info.h0; const unsigned int output_height = std::ceil(num_horiz_blocks / static_cast<float>(rhs_info.h0)); - TensorShape rhs_shape{ a.tensor_shape() }; + TensorShape rhs_shape{a.tensor_shape()}; rhs_shape.set(0, output_width); rhs_shape.set(1, output_height); @@ -257,14 +273,15 @@ inline TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRH * * @return the calculated shape */ -inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false) +inline TensorShape +compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false) { // The interleaved output matrix will have the following shape: [ a_height * W, ceil(a_width / W) ] where W = 4 * mult_interleave4x4_height ARM_COMPUTE_ERROR_ON(mult_interleave4x4_height < 1); const int interleave_width = 4 * mult_interleave4x4_height; - TensorShape shape_interleaved_a{ a.tensor_shape() }; + TensorShape shape_interleaved_a{a.tensor_shape()}; shape_interleaved_a.set(0, a.dimension(0) * interleave_width); - if(reinterpret_input_as_3d) + if (reinterpret_input_as_3d) { const int M = a.dimension(1) * a.dimension(2); const int height = std::ceil(M / static_cast<float>(interleave_width)); @@ -274,7 +291,7 @@ inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_inte // the tensor shape num_dimensions is automatically set to 1 instead of 3. // To avoid failures by removing a dimension that doesn't exist // check if the number of dimensions is greater than 2. - if(shape_interleaved_a.num_dimensions() > 2) + if (shape_interleaved_a.num_dimensions() > 2) { shape_interleaved_a.remove_dimension(2); } @@ -287,30 +304,6 @@ inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_inte return shape_interleaved_a; } -/** Calculate the reshaped shape of the weights to use in depthwise convolution - * - * @param[in] input Input tensor info - * @param[in] info Depthwise convolution information to be used for reshaping. - * - * @return the calculated shape - */ -inline TensorShape compute_reshaped_depthwise_weights_shape(const ITensorInfo &input, const DepthwiseConvolutionReshapeInfo &info) -{ - const auto data_layout = input.data_layout(); - TensorShape weights_shape{}; - - const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - const size_t num_channels = input.dimension(channel_idx); - const size_t num_rows = input.dimension(height_idx); - const size_t num_cols = input.dimension(width_idx); - - weights_shape.set(0, num_rows * num_cols * info.c0); - weights_shape.set(1, DIV_CEIL(num_channels, info.c0)); - return weights_shape; -} - /** Calculate the transposed 1xW shape * * @param[in] b Input tensor info @@ -320,7 +313,7 @@ inline TensorShape compute_reshaped_depthwise_weights_shape(const ITensorInfo &i inline TensorShape compute_transpose1xW_shape(const ITensorInfo &b) { // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ] - TensorShape shape_transposed1xW_b{ b.tensor_shape() }; + TensorShape shape_transposed1xW_b{b.tensor_shape()}; shape_transposed1xW_b.set(0, b.dimension(1) * 16); shape_transposed1xW_b.set(1, std::ceil(b.dimension(0) / 16.f)); @@ -340,7 +333,7 @@ inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInf // The transpose1xW output matrix will have the following shape: // [ b_height * W, ceil(b_width / W) ] where W = (16 / element size of the tensor) * mult_transpose1xW_width ARM_COMPUTE_ERROR_ON(mult_transpose1xW_width < 1); - TensorShape shape_transposed1xW_b{ b.tensor_shape() }; + TensorShape shape_transposed1xW_b{b.tensor_shape()}; const size_t transpose_width = (16 / b.element_size()) * mult_transpose1xW_width; shape_transposed1xW_b.set(0, b.dimension(1) * transpose_width); shape_transposed1xW_b.set(1, static_cast<size_t>(std::ceil(b.dimension(0) / static_cast<float>(transpose_width)))); @@ -356,8 +349,8 @@ inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInf */ inline TensorShape compute_reductionA_shape(const ITensorInfo &b) { - TensorShape shape_vector_sum_col{ b.tensor_shape() }; - if(shape_vector_sum_col.num_dimensions() > 1) + TensorShape shape_vector_sum_col{b.tensor_shape()}; + if (shape_vector_sum_col.num_dimensions() > 1) { shape_vector_sum_col.remove_dimension(1); } @@ -373,9 +366,9 @@ inline TensorShape compute_reductionA_shape(const ITensorInfo &b) */ inline TensorShape compute_reductionB_shape(const ITensorInfo &a) { - TensorShape shape_vector_sum_row{ a.tensor_shape() }; + TensorShape shape_vector_sum_row{a.tensor_shape()}; shape_vector_sum_row.set(Window::DimX, a.dimension(1)); - if(shape_vector_sum_row.num_dimensions() > 1) + if (shape_vector_sum_row.num_dimensions() > 1) { shape_vector_sum_row.remove_dimension(1); } @@ -392,7 +385,10 @@ inline TensorShape compute_reductionB_shape(const ITensorInfo &a) * * @return the calculated shape */ -inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &convolved_dims, bool batch_size_on_z, unsigned int num_groups = 1) +inline TensorShape compute_col2im_shape(const ITensorInfo &input, + const Size2D &convolved_dims, + bool batch_size_on_z, + unsigned int num_groups = 1) { ARM_COMPUTE_ERROR_ON(num_groups == 0); ARM_COMPUTE_ERROR_ON(input.tensor_shape()[1] != (convolved_dims.area())); @@ -403,10 +399,10 @@ inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D & const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - TensorShape col2im_shape{ input.tensor_shape() }; + TensorShape col2im_shape{input.tensor_shape()}; // If batches start on 3rd dimension shift dimensions right by 1 to retain upper tensor shape, // as first three will be override by H,W,C data - if(batch_size_on_z && num_groups == 1) + if (batch_size_on_z && num_groups == 1) { col2im_shape.shift_right(1); } @@ -425,29 +421,27 @@ inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D & */ inline TensorShape compute_transposed_shape(const ITensorInfo &input) { - TensorShape shape_transposed{ input.tensor_shape() }; + TensorShape shape_transposed{input.tensor_shape()}; - shape_transposed.set(0, input.dimension(1)); - shape_transposed.set(1, input.dimension(0)); + shape_transposed.set(0, input.dimension(1), false); + shape_transposed.set(1, input.dimension(0), false); return shape_transposed; } /** Calculate the depthwise convolution output shape of a tensor * - * @param[in] input Input tensor info - * @param[in] weights Weights tensor info - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. - * @param[in] dilation Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] input Input tensor info + * @param[in] weights Weights tensor info + * @param[in] info Convolution info * * @return the calculated shape */ -inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier, const Size2D &dilation = Size2D(1U, - 1U)) +inline TensorShape +compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info) { - const TensorShape input_shape{ input.tensor_shape() }; - const TensorShape weights_shape{ weights.tensor_shape() }; + const TensorShape input_shape{input.tensor_shape()}; + const TensorShape weights_shape{weights.tensor_shape()}; const DataLayout data_layout = input.data_layout(); const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); @@ -455,23 +449,54 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); const DataLayout weights_data_layout = weights.data_layout(); - const int weights_width_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH); - const int weights_height_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT); + const int weights_width_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH); + const int weights_height_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT); unsigned int output_width = 0; unsigned int output_height = 0; - std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx], - weights_shape[weights_width_idx], weights_shape[weights_height_idx], - conv_info, dilation); + std::tie(output_width, output_height) = + scaled_dimensions(input_shape[width_idx], input_shape[height_idx], weights_shape[weights_width_idx], + weights_shape[weights_height_idx], info.pad_stride_info, info.dilation); - TensorShape output_shape{ input_shape }; + TensorShape output_shape{input_shape}; output_shape.set(width_idx, output_width); output_shape.set(height_idx, output_height); - output_shape.set(channel_idx, input_shape[channel_idx] * depth_multiplier); + output_shape.set(channel_idx, input_shape[channel_idx] * info.depth_multiplier); return output_shape; } +/** Calculate padding required for deconvolution + * + * @param[in] input Input tensor info + * @param[in] weights Weights tensor shape + * @param[in] sx Stride on x axis + * @param[in] sy Stride on y axis + * @param[in] out_dims Output shape dimensions + * + * @return the padding required + */ +inline std::pair<int32_t, int32_t> compute_deconvolution_padding(const ITensorInfo &input, + const ITensorInfo &weights, + int32_t sx, + int32_t sy, + std::pair<uint32_t, uint32_t> out_dims) +{ + const DataLayout data_layout = input.data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + // Find the upsampled dimensions + int32_t out_x = (static_cast<int32_t>(input.dimension(idx_w)) - 1) * sx + 1; + int32_t out_y = (static_cast<int32_t>(input.dimension(idx_h)) - 1) * sy + 1; + + // Find the padding needed for the convolution with stride 1 in order to match output shape + int32_t padx = out_dims.first - (out_x - static_cast<int32_t>(weights.dimension(idx_w)) + 1); + int32_t pady = out_dims.second - (out_y - static_cast<int32_t>(weights.dimension(idx_h)) + 1); + + return std::make_pair(padx, pady); +} + /** Calculate the upsampled output shape used for deconvolution * * @param[in] input Input tensor info @@ -484,20 +509,28 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, * * @return the calculated shape */ -inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy, - std::pair<unsigned int, unsigned int> &out_dims, uint32_t &padx, uint32_t &pady) +inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, + const ITensorInfo &weights, + unsigned int sx, + unsigned int sy, + std::pair<unsigned int, unsigned int> &out_dims, + uint32_t &padx, + uint32_t &pady) { + // Find the padding needed for the convolution with stride 1 in order to match output shape + const auto padxy = + compute_deconvolution_padding(input, weights, static_cast<int32_t>(sx), static_cast<int32_t>(sy), out_dims); + padx = static_cast<uint32_t>(padxy.first); + pady = static_cast<uint32_t>(padxy.second); + const DataLayout data_layout = input.data_layout(); const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); // Find the upsampled dimensions - unsigned int out_x = (input.dimension(idx_w) - 1) * sx + 1; - unsigned int out_y = (input.dimension(idx_h) - 1) * sy + 1; + uint32_t out_x = (input.dimension(idx_w) - 1) * sx + 1; + uint32_t out_y = (input.dimension(idx_h) - 1) * sy + 1; - // Find the padding needed for the convolution with stride 1 in order to match output shape - padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1); - pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1); out_x += padx; out_y += pady; @@ -516,10 +549,12 @@ inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &inpu * * @return the calculated shape */ -inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, const ITensorInfo &input, const ITensorInfo &weights) +inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, + const ITensorInfo &input, + const ITensorInfo &weights) { - const TensorShape input_shape{ input.tensor_shape() }; - const TensorShape weights_shape{ weights.tensor_shape() }; + const TensorShape input_shape{input.tensor_shape()}; + const TensorShape weights_shape{weights.tensor_shape()}; const DataLayout data_layout = input.data_layout(); const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); @@ -527,7 +562,7 @@ inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned i const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); - TensorShape out_shape{ input_shape }; + TensorShape out_shape{input_shape}; out_shape.set(width_idx, out_dims.first); out_shape.set(height_idx, out_dims.second); out_shape.set(channel_idx, weights_shape[batch_idx]); @@ -543,11 +578,18 @@ inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned i * @param[in] dilation Dilation, in elements, across x and y * @param[in] batch_size_on_z True if batch size is on z axis * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution + * @param[in] input_pad_right (Optional) When fast-math is selected, per element padding for the im2col matrix may be necessary * * @return the calculated shape */ -inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, - unsigned int num_groups = 1) +inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, + const Size2D &kernel_dims, + const PadStrideInfo &conv_info, + bool has_bias, + const Size2D &dilation, + bool batch_size_on_z, + unsigned int num_groups = 1, + unsigned int input_pad_right = 0) { // The output shape will be the 3D shape [ out_channels * kernel_area, num_elems_per_out_channel, batches ] if batch_size_on_z == true // or the 4D shape [ out_channels * kernel_area / num_groups, num_elems_per_out_channel, num_groups, batches ] if batch_size_on_z == false @@ -556,17 +598,19 @@ inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Siz ARM_COMPUTE_ERROR_ON(num_groups > 1 && input->data_layout() != DataLayout::NCHW); ARM_COMPUTE_ERROR_ON(num_groups > 1 && batch_size_on_z); - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; const DataLayout data_layout = input->data_layout(); const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation); - output_shape.set(0, (output_shape[channel_idx] / num_groups * kernel_dims.area() + (has_bias ? 1 : 0))); // NOLINT + std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions( + output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation); + output_shape.set(0, ((output_shape[channel_idx] + input_pad_right) / num_groups * kernel_dims.area() + + (has_bias ? 1 : 0))); // NOLINT output_shape.set(1, (out_dims.first * out_dims.second)); - if(batch_size_on_z && output_shape.num_dimensions() >= 3) + if (batch_size_on_z && output_shape.num_dimensions() >= 3) { output_shape.remove_dimension(2); } @@ -588,7 +632,7 @@ inline TensorShape compute_flatten_shape(const ITensorInfo *input) { // The output shape will be the flatten version of the input (i.e. [ width * height * channels, num_batches, ... ] ). Used for FlattenLayer and FullyConnectedLayer. - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; output_shape.collapse(3); @@ -610,7 +654,7 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis = // - [x,y,z,w] and axis 3 will return [x*y*z, w] TensorShape shape2D = input->tensor_shape(); - if(axis < input->num_dimensions()) + if (axis < input->num_dimensions()) { // Collapse from axis onward (this changes the shape) shape2D.collapse_from(axis); @@ -624,7 +668,7 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis = shape2D.collapse(shape2D.num_dimensions()); } - if(axis == 0) + if (axis == 0) { // If axis is zero the first dim should be one. Since // collapse is an inclusive operation we need to shift @@ -643,15 +687,17 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis = */ inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info) { - TensorShape tensor_shape{ input.tensor_shape() }; + TensorShape tensor_shape{input.tensor_shape()}; const Size2D kernel_size = winograd_info.kernel_size; const Size2D output_tile_size = winograd_info.output_tile_size; - const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); + const Size2D input_tile_size = + Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH)); tensor_shape.set(Window::DimX, input.dimension(3)); - tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL))); + tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), + DataLayoutDimension::CHANNEL))); tensor_shape.set(Window::DimZ, input_tile_size.area()); return tensor_shape; @@ -669,23 +715,22 @@ inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &inp const PadStrideInfo conv_info = winograd_info.convolution_info; const Size2D kernel_size = winograd_info.kernel_size; const Size2D output_tile_size = winograd_info.output_tile_size; - const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); + const Size2D input_tile_size = + Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); const size_t idx_w = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); const size_t idx_c = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL); // Compute the number of output tiles along the x and y direction of size "output_tile_size" - const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]), - kernel_size, - output_tile_size, - conv_info); + const Size2D num_tiles = compute_winograd_convolution_tiles( + Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]), kernel_size, output_tile_size, conv_info); const unsigned int width = input.tensor_shape()[idx_c]; const unsigned int height = num_tiles.area(); const unsigned int depth = input_tile_size.area(); - TensorShape output_shape{ input.tensor_shape() }; + TensorShape output_shape{input.tensor_shape()}; output_shape.set(0, width); output_shape.set(1, height); output_shape.set(2, depth); @@ -708,12 +753,12 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in const DataLayout data_layout = winograd_info.output_data_layout; // Compute output shape - unsigned int output_width = 0; - unsigned int output_height = 0; + unsigned int output_width = 0; + unsigned int output_height = 0; std::tie(output_width, output_height) = scaled_dimensions(input_dimensions.width, input_dimensions.height, kernel_size.width, kernel_size.height, conv_info); - TensorShape tensor_shape{ input.tensor_shape() }; + TensorShape tensor_shape{input.tensor_shape()}; // Output dimension const unsigned int out_w = output_width; @@ -729,20 +774,21 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in /** Calculate the deep convolution shape output shape of a tensor * - * @param[in] input Input tensor info - * @param[in] weights Weights tensor info - * @param[in] conv_info Contains padding and stride information + * @param[in] input_shape Input tensor shape + * @param[in] input_data_layout Input data layout + * @param[in] weights_shape Weights tensor shape + * @param[in] conv_info Contains padding and stride information * * @return the calculated shape */ -inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info) +inline TensorShape compute_deep_convolution_shape(const TensorShape &input_shape, + DataLayout input_data_layout, + const TensorShape &weights_shape, + const PadStrideInfo &conv_info) { - const TensorShape input_shape{ input.tensor_shape() }; - const TensorShape weights_shape{ weights.tensor_shape() }; - - const size_t idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); - const size_t idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); - const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL); + const size_t idx_width = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::WIDTH); + const size_t idx_height = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::HEIGHT); + const size_t idx_channel = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::CHANNEL); const unsigned int input_width = input_shape[idx_width]; const unsigned int input_height = input_shape[idx_height]; @@ -751,9 +797,10 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons const unsigned int weights_out_channel = weights_shape[3]; unsigned int output_width = 0; unsigned int output_height = 0; - std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info); + std::tie(output_width, output_height) = + scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info); - TensorShape output_shape{ input_shape }; + TensorShape output_shape{input_shape}; output_shape.set(idx_width, output_width); output_shape.set(idx_height, output_height); output_shape.set(idx_channel, weights_out_channel); @@ -761,6 +808,53 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons return output_shape; } +/** Calculate the deep convolution shape output shape of a tensor + * + * @param[in] input Input tensor info + * @param[in] weights Weights tensor info + * @param[in] conv_info Contains padding and stride information + * + * @return the calculated shape + */ +inline TensorShape +compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &conv_info) +{ + return compute_deep_convolution_shape(input.tensor_shape(), input.data_layout(), weights.tensor_shape(), conv_info); +} + +/** Calculate the indirect buffer output shape used by the indirect convolution function + * + * @param[in] input_shape Input tensor shape + * @param[in] input_data_layout Input data layout + * @param[in] weights_shape Weights tensor shape + * @param[in] conv_info Contains padding and stride information + * @param[in] desc Contains the direct/indirect convolution compute arguments, such as the tiling dimensions + * + * @return the calculated shape + */ +inline TensorShape compute_indirect_buffer_shape(const TensorShape &input_shape, + DataLayout input_data_layout, + const TensorShape &weights_shape, + const PadStrideInfo &conv_info, + const DirectConvComputeKernelInfo &desc) +{ + ARM_COMPUTE_ERROR_ON_MSG(input_data_layout != DataLayout::NHWC, "The data layout can only be NHWC"); + ARM_COMPUTE_ERROR_ON_MSG(desc.m0 <= 0 || desc.m0 > 8, "M0 can only be greater than 0 and less than or equal to 8"); + + const unsigned int m0 = desc.m0; + const unsigned int kw = weights_shape[1]; + const unsigned int kh = weights_shape[2]; + + TensorShape output_conv2d_shape = + compute_deep_convolution_shape(input_shape, input_data_layout, weights_shape, conv_info); + + const unsigned int output_w = m0 * kw * kh; + const unsigned int output_h = DIV_CEIL(output_conv2d_shape[1] * output_conv2d_shape[2], m0); + const unsigned int output_b = output_conv2d_shape[3]; + + return TensorShape(output_w, output_h, output_b); +} + /** Calculate the min/max shape output shape of a tensor * * @param[in] input Input tensor info @@ -769,7 +863,7 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons */ inline TensorShape compute_min_max_shape(const ITensorInfo *input) { - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; output_shape.set(Window::DimX, 2); output_shape.remove_dimension(1); output_shape.remove_dimension(1); @@ -786,29 +880,63 @@ inline TensorShape compute_min_max_shape(const ITensorInfo *input) */ inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info) { - unsigned int pooled_w = 0; - unsigned int pooled_h = 0; + int pooled_w = 0; + int pooled_h = 0; + + TensorShape output_shape{input.tensor_shape()}; - TensorShape output_shape{ input.tensor_shape() }; + const bool is_global_pooling = pool_info.is_global_pooling; + const int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); + const int input_width = input.tensor_shape()[idx_width]; + const int input_height = input.tensor_shape()[idx_height]; + const int pool_size_x = is_global_pooling ? output_shape[idx_width] : pool_info.pool_size.width; + const int pool_size_y = is_global_pooling ? output_shape[idx_height] : pool_info.pool_size.height; - const bool is_global_pooling = pool_info.is_global_pooling; - const unsigned int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); - const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); - const unsigned int pool_size_x = is_global_pooling ? output_shape[idx_width] : pool_info.pool_size.width; - const unsigned int pool_size_y = is_global_pooling ? output_shape[idx_height] : pool_info.pool_size.height; + std::tie(pooled_w, pooled_h) = + scaled_dimensions_signed(input_width, input_height, pool_size_x, pool_size_y, pool_info.pad_stride_info); - std::tie(pooled_w, pooled_h) = scaled_dimensions(output_shape[idx_width], - output_shape[idx_height], - pool_size_x, - pool_size_y, - pool_info.pad_stride_info); + ARM_COMPUTE_ERROR_ON_MSG((pooled_w < 1 || pooled_h < 1), "Calculated output dimension size is invalid"); - output_shape.set(idx_width, pooled_w); - output_shape.set(idx_height, pooled_h); + output_shape.set(idx_width, static_cast<size_t>(pooled_w)); + output_shape.set(idx_height, static_cast<size_t>(pooled_h)); return output_shape; } +/** Calculate the output unpool shape of a tensor + * + * @param[in] input Input tensor info + * @param[in] pool_info Pooling layer info + * + * @return the calculated shape + */ +inline TensorShape compute_unpool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info) +{ + const unsigned int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); + const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); + const TensorShape input_shape = input.tensor_shape(); + ARM_COMPUTE_ERROR_ON(input_shape[idx_height] <= 1 || input_shape[idx_width] <= 1); + const PadStrideInfo pad_stride_info = pool_info.pad_stride_info; + const unsigned int stride_x = pad_stride_info.stride().first; + const unsigned int stride_y = pad_stride_info.stride().second; + + const int pad_left = pad_stride_info.pad_left(); + const int pad_top = pad_stride_info.pad_top(); + const int pad_right = pad_stride_info.pad_right(); + const int pad_bottom = pad_stride_info.pad_bottom(); + + TensorShape output_shape = input_shape; + const unsigned int out_width = + (input_shape[idx_width] - 1) * stride_x - pad_left - pad_right + pool_info.pool_size.width; + const unsigned int out_height = + (input_shape[idx_height] - 1) * stride_y - pad_top - pad_bottom + pool_info.pool_size.height; + + output_shape.set(idx_width, out_width); + output_shape.set(idx_height, out_height); + return output_shape; +} + /** Calculate the output roi align shape of a tensor * * @param[in] input Input tensor info @@ -817,9 +945,10 @@ inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo * * @return the calculated shape */ -inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info) +inline TensorShape +compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info) { - TensorShape output_shape{ input.tensor_shape() }; + TensorShape output_shape{input.tensor_shape()}; const unsigned int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); @@ -840,7 +969,7 @@ inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITens */ inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned int batch_size) { - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; output_shape.set(1, batch_size); return output_shape; @@ -855,15 +984,21 @@ inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned in * * @return the calculated shape */ -inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info) +inline TensorShape compute_mm_shape(const ITensorInfo &input0, + const ITensorInfo &input1, + bool is_interleaved_transposed, + const GEMMReshapeInfo &reshape_info) { ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4"); - ARM_COMPUTE_ERROR_ON_MSG(is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(), "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true"); + ARM_COMPUTE_ERROR_ON_MSG( + is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(), + "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true"); const bool reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d(); const bool reinterpret_output_as_3d = reshape_info.depth_output_gemm3d() != 0; const int depth_output_gemm3d = reinterpret_output_as_3d ? reshape_info.depth_output_gemm3d() : 1; - const int m = reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1); + const int m = + reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1); // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third // dimension of the output tensor @@ -872,7 +1007,7 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo const int dim2 = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2]; const int dim3 = reinterpret_input_as_3d ? 1 : input0.tensor_shape()[3]; - TensorShape output_shape{ input0.tensor_shape() }; + TensorShape output_shape{input0.tensor_shape()}; output_shape.set(0, dim0); output_shape.set(1, dim1); @@ -885,15 +1020,14 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo /** Calculate the matrix multiplication output shape of two tensors * - * @note Deprecated. Remove when GEMMReshapeInfo is not used anymore by any other kernels - * * @param[in] input0 First input tensor info * @param[in] input1 Second input tensor info * @param[in] gemm_info GEMM reshape info * * @return the calculated shape */ -inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info) +inline TensorShape +compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info) { ARM_COMPUTE_UNUSED(input1); ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4"); @@ -902,9 +1036,9 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d() != 0; const int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d() : 1; - TensorShape output_shape{ input0.tensor_shape() }; + TensorShape output_shape{input0.tensor_shape()}; - if(!reinterpret_input_as_3d && !reinterpret_output_as_3d) + if (!reinterpret_input_as_3d && !reinterpret_output_as_3d) { output_shape.set(0, gemm_info.n()); output_shape.set(1, gemm_info.m()); @@ -931,7 +1065,8 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo * * @return the calculated shape */ -inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info) +inline TensorShape +compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info) { ARM_COMPUTE_UNUSED(input1); ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4"); @@ -940,9 +1075,9 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0; const unsigned int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d : 1; - TensorShape output_shape{ input0.tensor_shape() }; + TensorShape output_shape{input0.tensor_shape()}; - if(!reinterpret_input_as_3d && !reinterpret_output_as_3d) + if (!reinterpret_input_as_3d && !reinterpret_output_as_3d) { output_shape.set(0, gemm_info.n); output_shape.set(1, gemm_info.m); @@ -963,20 +1098,50 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo /** Calculate the matrix multiplication output shape of two tensors * + * @param[in] input0 First input tensor info + * @param[in] input1 Second input tensor info + * @param[in] matmul_info Batch MatMul Kernel info to know which matrix is transposed + * + * @return the calculated shape + */ +inline TensorShape +compute_matmul_shape(const TensorShape &input0, const TensorShape &input1, const MatMulKernelInfo &matmul_info) +{ + TensorShape output_shape{input0}; + + if (matmul_info.adj_lhs) + { + output_shape.set(1, input0[0]); // The vertical (M) dimension + } + + if (matmul_info.adj_rhs) + { + output_shape.set(0, input1[1]); // The horizontal (N) dimension + } + else + { + output_shape.set(0, input1[0]); // The horizontal (N) dimension + } + + return output_shape; +} +/** Calculate the matrix multiplication output shape of two tensors + * * @param[in] input Input tensor info * @param[in] gemm_3d_depth (Optional) GEMM 3d depth * @param[in] batch_size_on_z (Optional) True if batch size is on z axis * * @return the calculated shape */ -inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false) +inline TensorShape +compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false) { ARM_COMPUTE_ERROR_ON(input.data_layout() != DataLayout::NHWC && gemm_3d_depth > 1); TensorShape output_shape = input.tensor_shape(); - if(gemm_3d_depth > 1) + if (gemm_3d_depth > 1) { - if(batch_size_on_z) + if (batch_size_on_z) { output_shape.shift_right(1); } @@ -1001,11 +1166,16 @@ inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned * @return the calculated shape */ inline TensorShape compute_strided_slice_shape(const ITensorInfo &input, - const Coordinates &starts, const Coordinates &ends, const Coordinates &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) + const Coordinates &starts, + const Coordinates &ends, + const Coordinates &strides, + int32_t begin_mask, + int32_t end_mask, + int32_t shrink_axis_mask) { using namespace arm_compute::helpers::tensor_transform; - return compute_strided_slice_output_shape(input.tensor_shape(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); + return compute_strided_slice_output_shape(input.tensor_shape(), starts, ends, strides, begin_mask, end_mask, + shrink_axis_mask); } /** Calculate the slice output shape of a tensor @@ -1016,60 +1186,72 @@ inline TensorShape compute_strided_slice_shape(const ITensorInfo &input, * * @return the calculated shape */ -inline TensorShape compute_slice_shape(const TensorShape &input_shape, const Coordinates &starts, const Coordinates &ends) +inline TensorShape +compute_slice_shape(const TensorShape &input_shape, const Coordinates &starts, const Coordinates &ends) { using namespace arm_compute::helpers::tensor_transform; - return compute_strided_slice_output_shape(input_shape, - starts, ends, BiStrides(), - 0, construct_slice_end_mask(ends), 0); + return compute_strided_slice_output_shape(input_shape, starts, ends, BiStrides(), 0, construct_slice_end_mask(ends), + 0); } /** Calculate the batch to space output shape of a tensor * - * @param[in] input Input tensor info - * @param[in] block_x Block shape x value - * @param[in] block_y Block shape y value + * @param[in] data_layout Data layout + * @param[in] input Input tensor shape + * @param[in] block_x Block shape x value + * @param[in] block_y Block shape y value + * @param[in] crop_info Information about how the output shape is cropped after batch to space is performed * * @return the calculated shape */ -inline TensorShape compute_batch_to_space_shape(const ITensorInfo *input, const int block_x, const int block_y) +inline TensorShape compute_batch_to_space_shape( + DataLayout data_layout, const TensorShape &input, int block_x, int block_y, const CropInfo &crop_info = CropInfo{}) { - ARM_COMPUTE_ERROR_ON(block_x <= 0 || block_y <= 0); + ARM_COMPUTE_ERROR_ON(block_x < 1 || block_y < 1); - const DataLayout data_layout = input->data_layout(); - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); + + TensorShape output_shape{input}; + + unsigned int new_width = input[idx_width] * static_cast<unsigned int>(block_x); + unsigned int new_height = input[idx_height] * static_cast<unsigned int>(block_y); + const unsigned int width_crop = crop_info.left + crop_info.right; + const unsigned int height_crop = crop_info.top + crop_info.bottom; + ARM_COMPUTE_ERROR_ON(new_width <= width_crop); + ARM_COMPUTE_ERROR_ON(new_height <= height_crop); + new_width -= width_crop; + new_height -= height_crop; - TensorShape output_shape{ input->tensor_shape() }; - output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_x); - output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_y); - output_shape.set(idx_batch, input->tensor_shape()[idx_batch] / (block_x * block_y)); + output_shape.set(idx_width, new_width); + output_shape.set(idx_height, new_height); + output_shape.set(idx_batch, input[idx_batch] / (block_x * block_y)); return output_shape; } /** Calculate the depth to space output shape of a tensor * - * @param[in] input Input tensor info - * @param[in] block Block shape value + * @param[in] input_shape Input tensor shape + * @param[in] data_layout Operation data layout + * @param[in] block Block shape value * * @return the calculated shape */ -inline TensorShape compute_depth_to_space_shape(const ITensorInfo *input, int block) +inline TensorShape compute_depth_to_space_shape(const TensorShape &input_shape, DataLayout data_layout, int block) { ARM_COMPUTE_ERROR_ON(block < 2); - const DataLayout data_layout = input->data_layout(); - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - TensorShape output_shape{ input->tensor_shape() }; - output_shape.set(idx_width, input->dimension(idx_width) * block); - output_shape.set(idx_height, input->dimension(idx_height) * block); - output_shape.set(idx_channel, input->dimension(idx_channel) / (block * block)); + TensorShape output_shape{input_shape}; + output_shape.set(idx_width, input_shape[idx_width] * block); + output_shape.set(idx_height, input_shape[idx_height] * block); + output_shape.set(idx_channel, input_shape[idx_channel] / (block * block)); return output_shape; } @@ -1087,10 +1269,10 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax TensorShape empty_shape; empty_shape.set(0, 0); - TensorShape out_shape{ input->tensor_shape() }; + TensorShape out_shape{input->tensor_shape()}; // Return empty shape if axis is invalid - if(axis > input->tensor_shape().num_dimensions()) + if (axis > input->tensor_shape().num_dimensions()) { return empty_shape; } @@ -1098,7 +1280,7 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax size_t axis_size = out_shape[axis]; // Return empty shape if num_split is not valid - if(axis_size % num_splits) + if (axis_size % num_splits) { return empty_shape; } @@ -1117,18 +1299,22 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax * * @return the calculated shape */ -inline TensorShape compute_space_to_batch_shape(const ITensorInfo *input, const int block_x, const int block_y, const Size2D &padding_left, const Size2D &padding_right) +inline TensorShape compute_space_to_batch_shape( + const ITensorInfo *input, int block_x, int block_y, const Size2D &padding_left, const Size2D &padding_right) { - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; const DataLayout data_layout = input->data_layout(); const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); - output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_x + padding_left.x() + padding_right.x()); - output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_y + padding_left.y() + padding_right.y()); - output_shape.set(idx_batch, input->tensor_shape()[idx_batch] / (block_x * block_y)); + ARM_COMPUTE_ERROR_ON((input->tensor_shape()[idx_width] + padding_left.x() + padding_right.x()) % block_x != 0); + ARM_COMPUTE_ERROR_ON((input->tensor_shape()[idx_height] + padding_left.y() + padding_right.y()) % block_y != 0); + + output_shape.set(idx_width, (input->tensor_shape()[idx_width] + padding_left.x() + padding_right.x()) / block_x); + output_shape.set(idx_height, (input->tensor_shape()[idx_height] + padding_left.y() + padding_right.y()) / block_y); + output_shape.set(idx_batch, input->tensor_shape()[idx_batch] * block_x * block_y); return output_shape; } @@ -1142,16 +1328,16 @@ inline TensorShape compute_space_to_batch_shape(const ITensorInfo *input, const */ inline TensorShape compute_space_to_depth_shape(const ITensorInfo *input, int32_t block_shape) { - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; const DataLayout data_layout = input->data_layout(); const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int idx_depth = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_shape); - output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_shape); - output_shape.set(idx_depth, input->tensor_shape()[idx_depth] / (block_shape * block_shape)); + output_shape.set(idx_width, input->tensor_shape()[idx_width] / block_shape); + output_shape.set(idx_height, input->tensor_shape()[idx_height] / block_shape); + output_shape.set(idx_depth, input->tensor_shape()[idx_depth] * (block_shape * block_shape)); return output_shape; } @@ -1187,7 +1373,7 @@ inline TensorShape compute_prior_box_shape(const ITensorInfo &input, const Prior inline TensorShape compute_padded_shape(const TensorShape &input_shape, const PaddingList &padding) { TensorShape padded_shape = input_shape; - for(size_t dim = 0; dim < padding.size(); ++dim) + for (size_t dim = 0; dim < padding.size(); ++dim) { const auto &padding_pair = padding[dim]; const uint32_t shape_on_index = (padded_shape.num_dimensions() <= dim) ? 1 : input_shape[dim]; @@ -1206,7 +1392,7 @@ inline TensorShape compute_padded_shape(const TensorShape &input_shape, const Pa inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Multiples &multiples) { TensorShape tiled_shape = input_shape; - for(size_t dim = 0; dim < multiples.size(); ++dim) + for (size_t dim = 0; dim < multiples.size(); ++dim) { tiled_shape.set(dim, input_shape[dim] * multiples[dim]); } @@ -1223,9 +1409,9 @@ inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Mul */ inline TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis, bool keep_dims = true) { - TensorShape output_shape{ input }; + TensorShape output_shape{input}; - if(!keep_dims) + if (!keep_dims) { output_shape.remove_dimension(axis); } @@ -1318,14 +1504,14 @@ inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, si #if defined(ARM_COMPUTE_ASSERTS_ENABLED) // All dimensions must match except the axis one - for(unsigned int i = 0; i < MAX_DIMS; ++i) + for (unsigned int i = 0; i < MAX_DIMS; ++i) { - if(i == axis) + if (i == axis) { continue; } - for(const auto &tensor : input) + for (const auto &tensor : input) { ARM_COMPUTE_ERROR_ON(tensor == nullptr); const TensorShape shape = extract_shape(tensor); @@ -1336,7 +1522,7 @@ inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, si // Calculate output shape size_t new_size = 0; - for(const auto &tensor : input) + for (const auto &tensor : input) { const TensorShape shape = extract_shape(tensor); new_size += shape[axis]; @@ -1359,14 +1545,14 @@ inline TensorShape compute_stack_shape(const ITensorInfo &a, unsigned int axis, ARM_COMPUTE_ERROR_ON(axis > a.num_dimensions()); ARM_COMPUTE_ERROR_ON(a.num_dimensions() > 4); - TensorShape shape_out{ a.tensor_shape() }; + TensorShape shape_out{a.tensor_shape()}; shape_out.set(axis, num_tensors); unsigned int i_shift = 0; - for(unsigned int i = 0; i < a.num_dimensions(); ++i) + for (unsigned int i = 0; i < a.num_dimensions(); ++i) { - if(i == axis) + if (i == axis) { i_shift++; } @@ -1376,18 +1562,177 @@ inline TensorShape compute_stack_shape(const ITensorInfo &a, unsigned int axis, return shape_out; } -inline TensorShape compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_shape, uint32_t actual_axis) +/** Calculate the output shape of 3d Convolution + * + * @param[in] src Input tensor shape + * @param[in] weights Weights tensor shape + * @param[in] conv3d_info 3d Convolution Parameters object + * + * @return the calculated shape + */ +inline TensorShape +compute_conv3d_shape(const TensorShape &src, const TensorShape &weights, const Conv3dInfo &conv3d_info) +{ + // Weight tensor shape indices (D H W Cin Cout) + constexpr unsigned int weights_depth_dim = 4u; + constexpr unsigned int weights_height_dim = 3u; + constexpr unsigned int weights_width_dim = 2u; + constexpr unsigned int weights_CHout_dim = 0u; + + // Source/Destination Tensor shape indices (N D H W C) + constexpr unsigned int batch_dim = 4u; + constexpr unsigned int depth_dim = 3u; + constexpr unsigned int height_dim = 2u; + constexpr unsigned int width_dim = 1u; + constexpr unsigned int channel_dim = 0u; + + TensorShape output_shape{src}; + const size_t pad_left = conv3d_info.padding.left; + const size_t pad_right = conv3d_info.padding.right; + const size_t pad_top = conv3d_info.padding.top; + const size_t pad_bottom = conv3d_info.padding.bottom; + const size_t pad_front = conv3d_info.padding.front; + const size_t pad_back = conv3d_info.padding.back; + const size_t dilation_x = conv3d_info.dilation.width; + const size_t dilation_y = conv3d_info.dilation.height; + const size_t dilation_z = conv3d_info.dilation.depth; + const size_t stride_x = conv3d_info.stride.x(); + const size_t stride_y = conv3d_info.stride.y(); + const size_t stride_z = conv3d_info.stride.z(); + + int output_width_size = 0; + int output_height_size = 0; + int output_depth_size = 0; + + switch (conv3d_info.round_type) + { + case DimensionRoundingType::FLOOR: + output_width_size = + static_cast<int>(std::floor((static_cast<float>(src[width_dim] + pad_left + pad_right - + (dilation_x * (weights[weights_width_dim] - 1) + 1)) / + stride_x) + + 1)); + output_height_size = + static_cast<int>(std::floor((static_cast<float>(src[height_dim] + pad_top + pad_bottom - + (dilation_y * (weights[weights_height_dim] - 1) + 1)) / + stride_y) + + 1)); + output_depth_size = + static_cast<int>(std::floor((static_cast<float>(src[depth_dim] + pad_front + pad_back - + (dilation_z * (weights[weights_depth_dim] - 1) + 1)) / + stride_z) + + 1)); + break; + case DimensionRoundingType::CEIL: + output_width_size = + static_cast<int>(std::ceil((static_cast<float>(src[width_dim] + pad_left + pad_right - + (dilation_x * (weights[weights_width_dim] - 1) + 1)) / + stride_x) + + 1)); + output_height_size = + static_cast<int>(std::ceil((static_cast<float>(src[height_dim] + pad_top + pad_bottom - + (dilation_y * (weights[weights_height_dim] - 1) + 1)) / + stride_y) + + 1)); + output_depth_size = + static_cast<int>(std::ceil((static_cast<float>(src[depth_dim] + pad_front + pad_back - + (dilation_z * (weights[weights_depth_dim] - 1) + 1)) / + stride_z) + + 1)); + break; + default: + ARM_COMPUTE_ERROR("Unsupported rounding type"); + } + + output_shape.set(batch_dim, src[batch_dim]); + output_shape.set(width_dim, output_width_size); + output_shape.set(height_dim, output_height_size); + output_shape.set(depth_dim, output_depth_size); + output_shape.set(channel_dim, weights[weights_CHout_dim]); + return output_shape; +} + +/** Calculate the output pool3d shape of a tensor + * + * @param[in] src Input tensor info + * @param[in] pool3d_info Pooling layer info + * + * @return the calculated shape + */ +inline TensorShape compute_pool3d_shape(const TensorShape &src, Pooling3dLayerInfo pool3d_info) +{ + TensorShape output_shape{src}; + + const auto data_layout = DataLayout::NDHWC; + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int idx_depth = get_data_layout_dimension_index(data_layout, DataLayoutDimension::DEPTH); + const int pool_size_width = pool3d_info.is_global_pooling ? src[idx_width] : pool3d_info.pool_size.width; + const int pool_size_height = pool3d_info.is_global_pooling ? src[idx_height] : pool3d_info.pool_size.height; + const int pool_size_depth = pool3d_info.is_global_pooling ? src[idx_depth] : pool3d_info.pool_size.depth; + int output_width = 0; + int output_height = 0; + int output_depth = 0; + + std::tie(output_width, output_height, output_depth) = + scaled_3d_dimensions_signed(src[idx_width], src[idx_height], src[idx_depth], pool_size_width, pool_size_height, + pool_size_depth, pool3d_info); + + ARM_COMPUTE_ERROR_ON_MSG((output_width < 1 || output_height < 1 || output_depth < 1), + "Calculated output dimension size is invalid"); + + output_shape.set(idx_width, static_cast<size_t>(output_width)); + output_shape.set(idx_height, static_cast<size_t>(output_height)); + output_shape.set(idx_depth, static_cast<size_t>(output_depth)); + + return output_shape; +} + +/** Calculate the gather output shape of a tensor + * + * @param[in] input_shape Input tensor shape + * @param[in] indices_shape Indices tensor shape. Only supports for 2d and 3d indices + * @param[in] actual_axis Axis to be used in the computation + * + * @note Let input_shape be (X,Y,Z) and indices shape (W,O,P) and axis 1 + * the new shape is computed by replacing the axis in the input shape with + * the indice shape so the output shape will be (X,W,O,P,Z) + * + * @return the calculated shape + */ +inline TensorShape +compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_shape, uint32_t actual_axis) { - ARM_COMPUTE_ERROR_ON(indices_shape.num_dimensions() > 1); - ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() > 4); - ARM_COMPUTE_ERROR_ON(actual_axis >= input_shape.num_dimensions()); + const auto input_num_dims = input_shape.num_dimensions(); + const auto indices_num_dims = indices_shape.num_dimensions(); + + ARM_COMPUTE_ERROR_ON(actual_axis >= input_num_dims); + ARM_COMPUTE_ERROR_ON(input_num_dims + indices_num_dims - 1 > Coordinates::num_max_dimensions); + + TensorShape output_shape; + size_t dim_no = 0; + + for (; dim_no < actual_axis; ++dim_no) + { + output_shape.set(dim_no, input_shape[dim_no]); + } + + for (; dim_no < actual_axis + indices_num_dims; ++dim_no) + { + output_shape.set(dim_no, indices_shape[dim_no - actual_axis]); + } + + for (; dim_no < input_num_dims + indices_num_dims - 1; ++dim_no) + { + output_shape.set(dim_no, input_shape[dim_no + 1 - indices_num_dims]); + } - TensorShape output_shape = input_shape; - output_shape[actual_axis] = indices_shape[0]; + ARM_COMPUTE_ERROR_ON(input_shape.total_size() * indices_shape.total_size() != + output_shape.total_size() * input_shape[actual_axis]); return output_shape; } } // namespace shape_calculator } // namespace misc } // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H */ +#endif // ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H diff --git a/arm_compute/core/utils/misc/Traits.h b/arm_compute/core/utils/misc/Traits.h index 1cbdbfe16f..944fcb95f9 100644 --- a/arm_compute/core/utils/misc/Traits.h +++ b/arm_compute/core/utils/misc/Traits.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,8 @@ #ifndef ARM_COMPUTE_UTILS_TRAITS_TRAITS_H #define ARM_COMPUTE_UTILS_TRAITS_TRAITS_H +#include "arm_compute/core/Types.h" + #include <type_traits> namespace arm_compute diff --git a/arm_compute/core/utils/misc/Utility.h b/arm_compute/core/utils/misc/Utility.h index b2bb63f5c8..22f10d74cc 100644 --- a/arm_compute/core/utils/misc/Utility.h +++ b/arm_compute/core/utils/misc/Utility.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,11 @@ #ifndef ARM_COMPUTE_MISC_UTILITY_H #define ARM_COMPUTE_MISC_UTILITY_H +#include "arm_compute/core/Error.h" + #include <algorithm> #include <array> +#include <cstdint> #include <limits> #include <numeric> #include <vector> @@ -41,7 +44,7 @@ struct index_sequence }; template <std::size_t N, std::size_t... S> -struct index_sequence_generator : index_sequence_generator < N - 1, N - 1, S... > +struct index_sequence_generator : index_sequence_generator<N - 1, N - 1, S...> { }; @@ -55,17 +58,17 @@ template <std::size_t N> using index_sequence_t = typename index_sequence_generator<N>::type; template <typename T, std::size_t N, T val, T... vals> -struct generate_array : generate_array < T, N - 1, val, val, vals... > +struct generate_array : generate_array<T, N - 1, val, val, vals...> { }; template <typename T, T val, T... vals> struct generate_array<T, 0, val, vals...> { - static constexpr std::array<T, sizeof...(vals)> value{ vals... }; + static constexpr std::array<T, sizeof...(vals)> value{vals...}; }; -template <typename T, T val, T... vals> +template <typename T, T val, T... vals> constexpr std::array<T, sizeof...(vals)> generate_array<T, 0, val, vals...>::value; /** @endcond */ @@ -76,7 +79,7 @@ template <std::size_t... S, typename T = std::array<typename std::iterator_traits<Iterator>::value_type, sizeof...(S)>> T make_array(Iterator first, index_sequence<S...>) { - return T{ { first[S]... } }; + return T{{first[S]...}}; } } // namespace detail @@ -84,7 +87,7 @@ template <std::size_t N, typename Iterator> std::array<typename std::iterator_traits<Iterator>::value_type, N> make_array(Iterator first, Iterator last) { ARM_COMPUTE_UNUSED(last); - return detail::make_array(first, index_sequence_t<N> {}); + return detail::make_array(first, index_sequence_t<N>{}); } /** Performs clamping among a lower and upper value. @@ -116,7 +119,7 @@ inline void for_each(F &&) * @param[in] args Remaining arguments */ template <typename F, typename T, typename... Ts> -inline void for_each(F &&func, T &&arg, Ts &&... args) +inline void for_each(F &&func, T &&arg, Ts &&...args) { func(std::forward<T>(arg)); for_each(std::forward<F>(func), std::forward<Ts>(args)...); @@ -140,9 +143,11 @@ inline T &&foldl(F &&, T &&value) * @param[in] values Remaining arguments */ template <typename F, typename T, typename U, typename... Us> -inline auto foldl(F &&func, T &&initial, U &&value, Us &&... values) -> decltype(func(std::forward<T>(initial), std::forward<U>(value))) +inline auto foldl(F &&func, T &&initial, U &&value, Us &&...values) + -> decltype(func(std::forward<T>(initial), std::forward<U>(value))) { - return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)), std::forward<Us>(values)...); + return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)), + std::forward<Us>(values)...); } /** Perform an index sort of a given vector. @@ -157,11 +162,7 @@ std::vector<size_t> sort_indices(const std::vector<T> &v) std::vector<size_t> idx(v.size()); std::iota(idx.begin(), idx.end(), 0); - std::sort(idx.begin(), idx.end(), - [&v](size_t i1, size_t i2) - { - return v[i1] < v[i2]; - }); + std::sort(idx.begin(), idx.end(), [&v](size_t i1, size_t i2) { return v[i1] < v[i2]; }); return idx; } @@ -175,7 +176,7 @@ std::vector<size_t> sort_indices(const std::vector<T> &v) */ inline bool endswith(const std::string &str, const std::string &suffix) { - if(str.size() < suffix.size()) + if (str.size() < suffix.size()) { return false; } @@ -202,12 +203,28 @@ inline bool check_aligned(void *ptr, const size_t alignment) */ inline std::string tolower(std::string string) { - std::transform(string.begin(), string.end(), string.begin(), [](unsigned char c) - { - return std::tolower(c); - }); + std::transform(string.begin(), string.end(), string.begin(), [](unsigned char c) { return std::tolower(c); }); return string; } + +/** Get environment variable as a string + * + * @note Return empty string on bare-metal + * + * @param[in] env_name Name of the Environment variable to retrieve + * + * @return Environment variable content, or empty string if the variable is undefined or on bare-metal + */ +inline std::string getenv(const std::string &env_name) +{ +#ifdef BARE_METAL + ARM_COMPUTE_UNUSED(env_name); + return std::string{}; +#else // BARE_METAL + const auto env_chr = std::getenv(env_name.c_str()); + return env_chr == nullptr ? std::string{} : std::string{env_chr}; +#endif // BARE_METAL +} } // namespace utility } // namespace arm_compute #endif /* ARM_COMPUTE_MISC_UTILITY_H */ |