diff options
author | Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> | 2022-09-20 11:49:23 +0100 |
---|---|---|
committer | Mohmun02 <MohammedSuhail.Munshi@arm.com> | 2022-10-12 09:27:42 +0000 |
commit | fa79fda2c797282de3589aaa69b06e065e8a21e0 (patch) | |
tree | 91020783a702c9a3be16dad076bca10321fe04c2 /src/core/helpers/WindowHelpers.cpp | |
parent | c8cc024603cb1db084227196a52e562bf251d339 (diff) | |
download | ComputeLibrary-fa79fda2c797282de3589aaa69b06e065e8a21e0.tar.gz |
Optimize Neon™ Logistic Activation
- Use a 1d execution window to improve memory access pattern.
Resolves: [COMPMID-5465]
Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>
Change-Id: Ida30669ffa06eb002ca43a6edf15e25a6eaad2f6
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8344
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/helpers/WindowHelpers.cpp')
-rw-r--r-- | src/core/helpers/WindowHelpers.cpp | 55 |
1 files changed, 49 insertions, 6 deletions
diff --git a/src/core/helpers/WindowHelpers.cpp b/src/core/helpers/WindowHelpers.cpp index fa152c9f58..a4d46db352 100644 --- a/src/core/helpers/WindowHelpers.cpp +++ b/src/core/helpers/WindowHelpers.cpp @@ -234,15 +234,15 @@ Window calculate_max_window_horizontal(const ValidRegion &valid_region, const St std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1) { - const auto &shape0 = src0.tensor_shape(); - const auto &shape1 = src1.tensor_shape(); - const auto &strides0 = src0.strides_in_bytes(); - const auto &strides1 = src1.strides_in_bytes(); - const auto num_dimensions = std::max(src0.num_dimensions(), src1.num_dimensions()); + const auto &shape0 = src0.tensor_shape(); + const auto &shape1 = src1.tensor_shape(); + const auto &strides0 = src0.strides_in_bytes(); + const auto &strides1 = src1.strides_in_bytes(); + const auto num_dimensions = std::max(src0.num_dimensions(), src1.num_dimensions()); Window win; size_t split_dimension = Window::DimY; - size_t dim = 0; + size_t dim = 0; size_t squashed_bytes = src0.element_size(); @@ -282,4 +282,47 @@ std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &sr return std::make_pair(win, split_dimension); } + +std::pair<Window, size_t> calculate_squashed_or_max_window(const ITensorInfo &src) +{ + const auto &shape = src.tensor_shape(); + const auto &strides = src.strides_in_bytes(); + const auto num_dimensions = src.num_dimensions(); + + Window win; + size_t split_dimension = Window::DimY; + size_t dim = 0; + size_t squashed_bytes = src.element_size(); + + // Try to squash the low dimensions together. + for(; dim < num_dimensions; ++dim) + { + if(strides[dim] != squashed_bytes) + { + break; + } + squashed_bytes *= shape[dim]; + } + if(dim == num_dimensions) + { + const auto squashed_elements = squashed_bytes / src.element_size(); + split_dimension = Window::DimX; + // The input tensor can be interpreted as 1D array. + win.set(0, Window::Dimension(0, squashed_elements, 1)); + for(dim = 1; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, 1, 1)); + } + } + else + { + // Generate the max window. + for(dim = 0; dim < Coordinates::num_max_dimensions; ++dim) + { + win.set(dim, Window::Dimension(0, shape[dim], 1)); + } + } + return std::make_pair(win, split_dimension); +} + } // namespace arm_compute |