aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEPermuteKernel.cpp
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2018-12-05 17:36:30 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2018-12-12 18:48:48 +0000
commit35767bc09f21050a9767a91b086b327afc928a81 (patch)
tree0b4b3dad94566ae3c52f847103b2577ada5dd06d /src/core/NEON/kernels/NEPermuteKernel.cpp
parent1f8db2be160718979d38e3671a135d22e83cc5c2 (diff)
downloadComputeLibrary-35767bc09f21050a9767a91b086b327afc928a81.tar.gz
COMPMID-1697: NEPermute extended support for more cases.
Regardless the input data layout NEPermute function has been added support for the all the permutations of 4d tensors Added corresponding validation tests. Change-Id: I0f8f20c2c3716e908a18a59783be53efab80ef5b Reviewed-on: https://review.mlplatform.org/367 Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEPermuteKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEPermuteKernel.cpp140
1 files changed, 110 insertions, 30 deletions
diff --git a/src/core/NEON/kernels/NEPermuteKernel.cpp b/src/core/NEON/kernels/NEPermuteKernel.cpp
index 29e6d501a6..5a2f258d4e 100644
--- a/src/core/NEON/kernels/NEPermuteKernel.cpp
+++ b/src/core/NEON/kernels/NEPermuteKernel.cpp
@@ -43,6 +43,48 @@ using namespace arm_compute;
namespace
{
+inline bool is_permutation_supported(const PermutationVector &v)
+{
+ static const std::array<PermutationVector, 6> permutations3 =
+ {
+ PermutationVector(2U, 0U, 1U),
+ PermutationVector(1U, 2U, 0U),
+ PermutationVector(0U, 1U, 2U),
+ PermutationVector(0U, 2U, 1U),
+ PermutationVector(1U, 0U, 2U),
+ PermutationVector(2U, 1U, 0U),
+ };
+ static const std::array<PermutationVector, 24> permutations4 =
+ {
+ PermutationVector(0U, 1U, 2U, 3U),
+ PermutationVector(1U, 0U, 2U, 3U),
+ PermutationVector(2U, 0U, 1U, 3U),
+ PermutationVector(0U, 2U, 1U, 3U),
+ PermutationVector(1U, 2U, 0U, 3U),
+ PermutationVector(2U, 1U, 0U, 3U),
+ PermutationVector(2U, 1U, 3U, 0U),
+ PermutationVector(1U, 2U, 3U, 0U),
+ PermutationVector(3U, 2U, 1U, 0U),
+ PermutationVector(2U, 3U, 1U, 0U),
+ PermutationVector(1U, 3U, 2U, 0U),
+ PermutationVector(3U, 1U, 2U, 0U),
+ PermutationVector(3U, 0U, 2U, 1U),
+ PermutationVector(0U, 3U, 2U, 1U),
+ PermutationVector(2U, 3U, 0U, 1U),
+ PermutationVector(3U, 2U, 0U, 1U),
+ PermutationVector(0U, 2U, 3U, 1U),
+ PermutationVector(2U, 0U, 3U, 1U),
+ PermutationVector(1U, 0U, 3U, 2U),
+ PermutationVector(0U, 1U, 3U, 2U),
+ PermutationVector(3U, 1U, 0U, 2U),
+ PermutationVector(1U, 3U, 0U, 2U),
+ PermutationVector(0U, 3U, 1U, 2U),
+ PermutationVector(3U, 0U, 1U, 2U)
+ };
+
+ return (permutations3.end() != std::find(permutations3.begin(), permutations3.end(), v)) || (permutations4.end() != std::find(permutations4.begin(), permutations4.end(), v));
+}
+
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
{
//Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
@@ -50,9 +92,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
DataType::U16, DataType::S16,
DataType::U32, DataType::S32,
DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((perm != PermutationVector{ 2U, 0U, 1U })
- && (perm != PermutationVector{ 1U, 2U, 0U }),
- "Only [2, 0, 1] and [1, 2, 0] permutation is supported");
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_permutation_supported(perm), "PermutationVector not supported.");
const TensorShape output_shape = misc::shape_calculator::compute_permutation_output_shape(*input, perm);
@@ -70,12 +111,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
template <typename T>
void NEPermuteKernel::run_permute(const Window &window)
{
+ const DataLayout input_layout = _input->info()->data_layout();
+
// Input window
Window window_in = window;
- window_in.set(Window::DimX, Window::Dimension(window.x().start(), window.x().end(), window.x().end() - window.x().start()));
- window_in.set(Window::DimY, Window::Dimension(window.y().start(), window.y().end(), window.y().end() - window.y().start()));
- window_in.set(Window::DimZ, Window::Dimension(window.z().start(), window.z().end(), window.z().end() - window.z().start()));
- window_in.set(3, Window::Dimension(window[3].start(), window[3].end(), window[3].end() - window[3].start()));
+
+ // we only support these two configs in arm_compute/core/NEON/kernels/convolution/common/shims.hpp, for all others
+ // we have to fall back to C++
+ if((input_layout == DataLayout::NCHW && _perm == PermutationVector{ 2U, 0U, 1U }) || (input_layout == DataLayout::NHWC && _perm == PermutationVector{ 1U, 2U, 0U }))
+ {
+ window_in.set(Window::DimX, Window::Dimension(window.x().start(), window.x().end(), window.x().end() - window.x().start()));
+ window_in.set(Window::DimY, Window::Dimension(window.y().start(), window.y().end(), window.y().end() - window.y().start()));
+ window_in.set(Window::DimZ, Window::Dimension(window.z().start(), window.z().end(), window.z().end() - window.z().start()));
+ window_in.set(3, Window::Dimension(window[3].start(), window[3].end(), window[3].end() - window[3].start()));
+ }
// Output window
Window window_out(window);
@@ -89,23 +138,53 @@ void NEPermuteKernel::run_permute(const Window &window)
Iterator in(_input, window_in);
Iterator out(_output, window_out);
- // CHW -> HWC
- if(_perm == PermutationVector{ 2U, 0U, 1U })
+ int in_row_stride = 0;
+ int in_col_stride = 0;
+ int in_channel_stride = 0;
+ int in_batch_stride = 0;
+ int n_cols = 0;
+ int n_rows = 0;
+ int n_channels = 0;
+ int n_batches = 0;
+
+ switch(input_layout)
{
- const int in_row_stride = _input->info()->strides_in_bytes().y() / sizeof(T);
- const int in_channel_stride = _input->info()->strides_in_bytes().z() / sizeof(T);
- const int in_batch_stride = _input->info()->strides_in_bytes()[3] / sizeof(T);
+ case DataLayout::NCHW:
+ {
+ in_row_stride = _input->info()->strides_in_bytes().y() / sizeof(T);
+ in_channel_stride = _input->info()->strides_in_bytes().z() / sizeof(T);
+ in_batch_stride = _input->info()->strides_in_bytes()[3] / sizeof(T);
+ n_cols = _input->info()->tensor_shape().x();
+ n_rows = window_in.y().step();
+ n_channels = _input->info()->tensor_shape().z();
+ n_batches = _input->info()->tensor_shape()[3];
+ break;
+ }
+ case DataLayout::NHWC:
+ {
+ in_col_stride = _input->info()->strides_in_bytes().y() / sizeof(T);
+ in_row_stride = _input->info()->strides_in_bytes().z() / sizeof(T);
+ in_batch_stride = _input->info()->strides_in_bytes()[3] / sizeof(T);
+ n_channels = _input->info()->tensor_shape().x();
+ n_cols = window_in.y().step();
+ n_rows = _input->info()->tensor_shape().z();
+ n_batches = _input->info()->tensor_shape()[3];
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Invalid input data layout.");
+ break;
+ }
+ }
+ // CHW -> HWC
+ if(input_layout == DataLayout::NCHW && _perm == PermutationVector{ 2U, 0U, 1U })
+ {
const int out_channel_stride = _output->info()->strides_in_bytes().x() / sizeof(T);
const int out_col_stride = _output->info()->strides_in_bytes().y() / sizeof(T);
const int out_row_stride = _output->info()->strides_in_bytes().z() / sizeof(T);
const int out_batch_stride = _output->info()->strides_in_bytes()[3] / sizeof(T);
-
- const int n_cols = _input->info()->tensor_shape().x();
- const int n_rows = window_in.y().step();
- const int n_channels = _input->info()->tensor_shape().z();
- const int n_batches = _input->info()->tensor_shape()[3];
-
execute_window_loop(window_in, [&](const Coordinates & id)
{
const int idx = id[0] * out_col_stride + id[1] * out_row_stride + id[2] * out_channel_stride;
@@ -117,22 +196,12 @@ void NEPermuteKernel::run_permute(const Window &window)
in, out);
}
// HWC -> CHW
- else if(_perm == PermutationVector{ 1U, 2U, 0U })
+ else if(input_layout == DataLayout::NHWC && _perm == PermutationVector{ 1U, 2U, 0U })
{
- const int in_col_stride = _input->info()->strides_in_bytes().y() / sizeof(T);
- const int in_row_stride = _input->info()->strides_in_bytes().z() / sizeof(T);
- const int in_batch_stride = _input->info()->strides_in_bytes()[3] / sizeof(T);
-
const int out_col_stride = _output->info()->strides_in_bytes().x() / sizeof(T);
const int out_row_stride = _output->info()->strides_in_bytes().y() / sizeof(T);
const int out_channel_stride = _output->info()->strides_in_bytes().z() / sizeof(T);
const int out_batch_stride = _output->info()->strides_in_bytes()[3] / sizeof(T);
-
- const int n_channels = _input->info()->tensor_shape().x();
- const int n_cols = window_in.y().step();
- const int n_rows = _input->info()->tensor_shape().z();
- const int n_batches = _input->info()->tensor_shape()[3];
-
execute_window_loop(window_in, [&](const Coordinates & id)
{
const int idx = id[0] * out_channel_stride + id[1] * out_col_stride + id[2] * out_row_stride;
@@ -145,7 +214,18 @@ void NEPermuteKernel::run_permute(const Window &window)
}
else
{
- ARM_COMPUTE_ERROR("Unsupported permutation vector");
+ // All other cases fall back to C++
+ // Permute strides
+ Strides strides = _output->info()->strides_in_bytes();
+ Strides perm_strides = strides;
+ permute_strides(perm_strides, _perm);
+ const int perm_stride_3 = _input->info()->num_dimensions() >= 4 ? perm_strides[3] : 0;
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const int idx = id[0] * perm_strides[0] + id[1] * perm_strides[1] + id[2] * perm_strides[2] + id[3] * perm_stride_3;
+ *(reinterpret_cast<T *>(out.ptr() + idx)) = *(reinterpret_cast<const T *>(in.ptr()));
+ },
+ in, out);
}
}