aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorMohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>2022-09-20 11:49:23 +0100
committerMohmun02 <MohammedSuhail.Munshi@arm.com>2022-10-12 09:27:42 +0000
commitfa79fda2c797282de3589aaa69b06e065e8a21e0 (patch)
tree91020783a702c9a3be16dad076bca10321fe04c2 /src/core
parentc8cc024603cb1db084227196a52e562bf251d339 (diff)
downloadComputeLibrary-fa79fda2c797282de3589aaa69b06e065e8a21e0.tar.gz
Optimize Neon™ Logistic Activation
- Use a 1d execution window to improve memory access pattern. Resolves: [COMPMID-5465] Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> Change-Id: Ida30669ffa06eb002ca43a6edf15e25a6eaad2f6 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8344 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/helpers/WindowHelpers.cpp55
-rw-r--r--src/core/helpers/WindowHelpers.h12
2 files changed, 61 insertions, 6 deletions
diff --git a/src/core/helpers/WindowHelpers.cpp b/src/core/helpers/WindowHelpers.cpp
index fa152c9f58..a4d46db352 100644
--- a/src/core/helpers/WindowHelpers.cpp
+++ b/src/core/helpers/WindowHelpers.cpp
@@ -234,15 +234,15 @@ Window calculate_max_window_horizontal(const ValidRegion &valid_region, const St
std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1)
{
- const auto &shape0 = src0.tensor_shape();
- const auto &shape1 = src1.tensor_shape();
- const auto &strides0 = src0.strides_in_bytes();
- const auto &strides1 = src1.strides_in_bytes();
- const auto num_dimensions = std::max(src0.num_dimensions(), src1.num_dimensions());
+ const auto &shape0 = src0.tensor_shape();
+ const auto &shape1 = src1.tensor_shape();
+ const auto &strides0 = src0.strides_in_bytes();
+ const auto &strides1 = src1.strides_in_bytes();
+ const auto num_dimensions = std::max(src0.num_dimensions(), src1.num_dimensions());
Window win;
size_t split_dimension = Window::DimY;
- size_t dim = 0;
+ size_t dim = 0;
size_t squashed_bytes = src0.element_size();
@@ -282,4 +282,47 @@ std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &sr
return std::make_pair(win, split_dimension);
}
+
+std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src)
+{
+ const auto &shape = src.tensor_shape();
+ const auto &strides = src.strides_in_bytes();
+ const auto num_dimensions = src.num_dimensions();
+
+ Window win;
+ size_t split_dimension = Window::DimY;
+ size_t dim = 0;
+ size_t squashed_bytes = src.element_size();
+
+ // Try to squash the low dimensions together.
+ for(; dim < num_dimensions; ++dim)
+ {
+ if(strides[dim] != squashed_bytes)
+ {
+ break;
+ }
+ squashed_bytes *= shape[dim];
+ }
+ if(dim == num_dimensions)
+ {
+ const auto squashed_elements = squashed_bytes / src.element_size();
+ split_dimension = Window::DimX;
+ // The input tensor can be interpreted as 1D array.
+ win.set(0, Window::Dimension(0, squashed_elements, 1));
+ for(dim = 1; dim < Coordinates::num_max_dimensions; ++dim)
+ {
+ win.set(dim, Window::Dimension(0, 1, 1));
+ }
+ }
+ else
+ {
+ // Generate the max window.
+ for(dim = 0; dim < Coordinates::num_max_dimensions; ++dim)
+ {
+ win.set(dim, Window::Dimension(0, shape[dim], 1));
+ }
+ }
+ return std::make_pair(win, split_dimension);
+}
+
} // namespace arm_compute
diff --git a/src/core/helpers/WindowHelpers.h b/src/core/helpers/WindowHelpers.h
index c9e5a135c0..eccf7f2d18 100644
--- a/src/core/helpers/WindowHelpers.h
+++ b/src/core/helpers/WindowHelpers.h
@@ -176,6 +176,18 @@ inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps
return calculate_max_enlarged_window(info.valid_region(), steps, border_size);
}
+/** Calculate the squashed or maximum window for the given tensor shape.
+ *
+ * If the tensor data resides continuously in the memory, the tensor can be interpreted
+ * as 1D array and all the dimensions can be squashed together into the x-dimension.
+ * Otherwise, generate the max window for the given tensor shape.
+ *
+ * @param[in] src Tensor info object defining the shape of the input tensor.
+ *
+ * @return The maximum window the kernel can be executed on and the preferred split dimension.
+ */
+std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src);
+
/** Calculate the squashed or maximum window for the given tensor shapes.
*
* If the tensor data resides continuously in the memory, the tensor can be interpreted