aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiCongLi <sicong.li@arm.com>2021-02-22 14:28:33 +0000
committerSiCong Li <sicong.li@arm.com>2021-03-08 09:39:03 +0000
commitc7b1e84ac5f3ada1b2f78c66979ef4d44804a955 (patch)
tree3f2dd18121727cf8000a84ed0030523ce7c0e91e
parentb861074c74ea99222207a4a0a71954f8852f8704 (diff)
downloadComputeLibrary-c7b1e84ac5f3ada1b2f78c66979ef4d44804a955.tar.gz
Remove usage of valid window region in NHWC CPU kernels - Part1
Replace all calculate_max_window(ValidRegion, ...) with calculate_max_window(TensorShape, ...) in CPU kernels Resolves COMPMID-4152 (1/2) Change-Id: I7403ea6b24b9e7889890839142a06439d6c8a499 Signed-off-by: SiCongLi <sicong.li@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5202 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/NEON/kernels/NELogicalKernel.cpp5
-rw-r--r--src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp20
-rw-r--r--src/core/NEON/kernels/NESelectKernel.cpp2
-rw-r--r--src/core/cpu/kernels/CpuAddKernel.cpp9
-rw-r--r--src/core/cpu/kernels/CpuElementwiseKernel.cpp6
-rw-r--r--src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp6
-rw-r--r--src/core/cpu/kernels/CpuSubKernel.cpp9
-rw-r--r--src/core/helpers/WindowHelpers.cpp53
-rw-r--r--src/core/helpers/WindowHelpers.h15
9 files changed, 81 insertions, 44 deletions
diff --git a/src/core/NEON/kernels/NELogicalKernel.cpp b/src/core/NEON/kernels/NELogicalKernel.cpp
index d98694ffe1..e1c24da777 100644
--- a/src/core/NEON/kernels/NELogicalKernel.cpp
+++ b/src/core/NEON/kernels/NELogicalKernel.cpp
@@ -287,9 +287,8 @@ void NELogicalKernel::configure(const ITensorInfo *input1, const ITensorInfo *in
if(op != LogicalOperation::Not)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input2);
- const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2);
- out_shape = broadcast_pair.first;
- win = calculate_max_window(broadcast_pair.second, Steps());
+ out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
+ win = calculate_max_window(out_shape, Steps());
}
ICPPKernel::configure(win);
diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
index 6661326ea8..b287e18281 100644
--- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
+++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -1486,9 +1486,7 @@ void NEPixelWiseMultiplicationKernel::configure(ITensorInfo *input1, ITensorInfo
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, output, scale, overflow_policy, rounding_policy));
- const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2);
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
+ const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
// Auto initialize output if not initialized
set_shape_if_empty(*output, out_shape);
@@ -1624,10 +1622,7 @@ void NEPixelWiseMultiplicationKernel::configure(ITensorInfo *input1, ITensorInfo
}
// Configure kernel window
- Coordinates coord;
- coord.set_num_dimensions(output->num_dimensions());
- output->set_valid_region(valid_region);
- Window win = calculate_max_window(valid_region, Steps());
+ Window win = calculate_max_window(out_shape);
INEKernel::configure(win);
}
@@ -1692,19 +1687,14 @@ void NEComplexPixelWiseMultiplicationKernel::configure(ITensorInfo *input1, ITen
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_complex(input1, input2, output));
- const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2);
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
+ const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
// Auto initialize output if not initialized
const TensorInfo out_info(out_shape, input1->num_channels(), input1->data_type());
auto_init_if_empty(*output, out_info);
// Configure kernel window
- Coordinates coord;
- coord.set_num_dimensions(output->num_dimensions());
- output->set_valid_region(valid_region);
- Window win = calculate_max_window(valid_region, Steps());
+ Window win = calculate_max_window(out_shape);
INEKernel::configure(win);
}
diff --git a/src/core/NEON/kernels/NESelectKernel.cpp b/src/core/NEON/kernels/NESelectKernel.cpp
index 1d5f2b61a1..22cd442889 100644
--- a/src/core/NEON/kernels/NESelectKernel.cpp
+++ b/src/core/NEON/kernels/NESelectKernel.cpp
@@ -224,7 +224,7 @@ void NESelectKernel::configure(const ITensor *c, const ITensor *x, const ITensor
_function = it->second;
}
- Window win = calculate_max_window(x->info()->valid_region());
+ Window win = calculate_max_window(*x->info());
INEKernel::configure(win);
}
diff --git a/src/core/cpu/kernels/CpuAddKernel.cpp b/src/core/cpu/kernels/CpuAddKernel.cpp
index 31c7b2af60..fc88a7e22d 100644
--- a/src/core/cpu/kernels/CpuAddKernel.cpp
+++ b/src/core/cpu/kernels/CpuAddKernel.cpp
@@ -249,9 +249,7 @@ Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, cons
std::pair<Status, Window> validate_and_configure_window(const ITensorInfo &src0, const ITensorInfo &src1, ITensorInfo &dst)
{
- const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(src0, src1);
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
+ const TensorShape &out_shape = TensorShape::broadcast_shape(src0.tensor_shape(), src1.tensor_shape());
// Auto initialize dst if not initialized
{
@@ -287,12 +285,9 @@ std::pair<Status, Window> validate_and_configure_window(const ITensorInfo &src0,
}
}
- Window win = calculate_max_window(valid_region, Steps());
+ Window win = calculate_max_window(out_shape, Steps());
// CpuAddKernel doesn't need padding so update_window_and_padding() can be skipped
- Coordinates coord;
- coord.set_num_dimensions(dst.num_dimensions());
- dst.set_valid_region(valid_region);
return std::make_pair(Status{}, win);
}
} // namespace
diff --git a/src/core/cpu/kernels/CpuElementwiseKernel.cpp b/src/core/cpu/kernels/CpuElementwiseKernel.cpp
index ab915b9d72..1ac21acbc0 100644
--- a/src/core/cpu/kernels/CpuElementwiseKernel.cpp
+++ b/src/core/cpu/kernels/CpuElementwiseKernel.cpp
@@ -182,14 +182,12 @@ void CpuElementwiseKernel::configure_common(const ITensorInfo *input1, const ITe
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
// Configure kernel window
- const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2);
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
+ const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
// Auto initialize output if not initialized
auto_init_if_empty(*output, out_shape, 1, input1->data_type());
- Window win = calculate_max_window(valid_region);
+ Window win = calculate_max_window(out_shape);
ICpuKernel::configure(win);
}
diff --git a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
index d2681bb060..2b5c11f8e1 100644
--- a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
+++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
@@ -113,14 +113,12 @@ void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo
ARM_COMPUTE_ERROR_THROW_ON(validate(op, input, output));
// Configure kernel window
- const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(input);
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
+ const TensorShape &out_shape = TensorShape::broadcast_shape(input.tensor_shape());
// Auto initialize output if not initialized
auto_init_if_empty(output, out_shape, 1, input.data_type());
- Window win = calculate_max_window(valid_region);
+ Window win = calculate_max_window(out_shape);
_op = op;
diff --git a/src/core/cpu/kernels/CpuSubKernel.cpp b/src/core/cpu/kernels/CpuSubKernel.cpp
index a03dcf2353..d7057bbe2b 100644
--- a/src/core/cpu/kernels/CpuSubKernel.cpp
+++ b/src/core/cpu/kernels/CpuSubKernel.cpp
@@ -201,9 +201,7 @@ void CpuSubKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I
ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst, policy));
- const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*src0, *src1);
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
+ const TensorShape &out_shape = TensorShape::broadcast_shape(src0->tensor_shape(), src1->tensor_shape());
// Auto initialize dst if not initialized
set_shape_if_empty(*dst, out_shape);
@@ -211,10 +209,7 @@ void CpuSubKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I
_policy = policy;
// CpuSubKernel doesn't need padding so update_window_and_padding() can be skipped
- Coordinates coord;
- coord.set_num_dimensions(dst->num_dimensions());
- dst->set_valid_region(valid_region);
- Window win = calculate_max_window(valid_region, Steps());
+ Window win = calculate_max_window(out_shape, Steps());
ICpuKernel::configure(win);
}
diff --git a/src/core/helpers/WindowHelpers.cpp b/src/core/helpers/WindowHelpers.cpp
index ba10eb9775..75ffb71b4b 100644
--- a/src/core/helpers/WindowHelpers.cpp
+++ b/src/core/helpers/WindowHelpers.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2020 Arm Limited.
+* Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -79,6 +79,57 @@ Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps,
return window;
}
+Window calculate_max_window(const TensorShape &shape, const Steps &steps, bool skip_border, BorderSize border_size)
+{
+ if(!skip_border)
+ {
+ border_size = BorderSize(0);
+ }
+
+ Window window;
+
+ window.set(0, Window::Dimension(
+ // Skip the border left of the image
+ border_size.left,
+ // Skip the border right of the image
+ // Make sure the window width is a multiple of the step size
+ border_size.left + ceil_to_multiple(std::max(0, static_cast<int>(shape[0]) - static_cast<int>(border_size.left) - static_cast<int>(border_size.right)), steps[0]),
+ steps[0]));
+
+ size_t n = 1;
+
+ if(shape.num_dimensions() > 1)
+ {
+ window.set(1, Window::Dimension(
+ // Skip the border above the image
+ border_size.top,
+ // Skip the border below the image
+ border_size.top + ceil_to_multiple(std::max(0, static_cast<int>(shape[1]) - static_cast<int>(border_size.top) - static_cast<int>(border_size.bottom)), steps[1]),
+ steps[1]));
+
+ ++n;
+ }
+
+ if(shape.num_dimensions() > 2)
+ {
+ window.set(2, Window::Dimension(0, std::max<size_t>(1, shape[2]), steps[2]));
+
+ ++n;
+ }
+
+ for(; n < shape.num_dimensions(); ++n)
+ {
+ window.set(n, Window::Dimension(0, std::max<size_t>(1, shape[n])));
+ }
+
+ for(; n < Coordinates::num_max_dimensions; ++n)
+ {
+ window.set(n, Window::Dimension(0, 1));
+ }
+
+ return window;
+}
+
Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps, BorderSize border_size)
{
const Coordinates &anchor = valid_region.anchor;
diff --git a/src/core/helpers/WindowHelpers.h b/src/core/helpers/WindowHelpers.h
index 9bc2135b6d..9216c33f16 100644
--- a/src/core/helpers/WindowHelpers.h
+++ b/src/core/helpers/WindowHelpers.h
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2020 Arm Limited.
+* Copyright (c) 2020-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -107,6 +107,17 @@ Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps
/** Calculate the maximum window for a given tensor shape and border setting
*
+ * @param[in] shape Shape of the tensor space
+ * @param[in] steps (Optional) Number of elements processed for each step.
+ * @param[in] skip_border (Optional) If true exclude the border region from the window.
+ * @param[in] border_size (Optional) Border size.
+ *
+ * @return The maximum window the kernel can be executed on.
+ */
+Window calculate_max_window(const TensorShape &shape, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize());
+
+/** Calculate the maximum window for a given tensor shape and border setting
+ *
* @param[in] info Tensor info object defining the shape of the object for which the window is created.
* @param[in] steps (Optional) Number of elements processed for each step.
* @param[in] skip_border (Optional) If true exclude the border region from the window.
@@ -116,7 +127,7 @@ Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps
*/
inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize())
{
- return calculate_max_window(info.valid_region(), steps, skip_border, border_size);
+ return calculate_max_window(info.tensor_shape(), steps, skip_border, border_size);
}
/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting