aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2019-04-09 14:03:17 +0100
committerMichalis Spyrou <michalis.spyrou@arm.com>2019-04-11 09:16:00 +0000
commita50e702289af66944e860eafc7f3b32f6c5f30be (patch)
tree9803990dd7eed6686cbe01a3ca1b6ed90d555261
parent8d94269d7985b9cee67e52581e2f58b6c99d7f0d (diff)
downloadComputeLibrary-a50e702289af66944e860eafc7f3b32f6c5f30be.tar.gz
COMPMID-2012: Remove unnecessary templates from NEON kernels
Change-Id: I2deb26188c7de7c6ad10d2f51f83e729fed7e5e2 Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/961 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/NEON/kernels/NEReorgLayerKernel.h23
-rw-r--r--arm_compute/core/NEON/kernels/NEStackLayerKernel.h14
-rw-r--r--arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h11
-rw-r--r--src/core/NEON/kernels/NEReorgLayerKernel.cpp96
-rw-r--r--src/core/NEON/kernels/NEReverseKernel.cpp18
-rw-r--r--src/core/NEON/kernels/NEStackLayerKernel.cpp33
-rw-r--r--src/core/NEON/kernels/NEWeightsReshapeKernel.cpp128
7 files changed, 100 insertions, 223 deletions
diff --git a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h b/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h
index 7e0fb4350d..076af4fd1c 100644
--- a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -75,24 +75,9 @@ public:
void run(const Window &window, const ThreadInfo &info) override;
private:
- /** Template function to run the reorg
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run_reorg(const Window &window);
-
- /** Common signature for all the specialised reorg functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using ReorgFunctionPtr = void (NEReorgLayerKernel::*)(const Window &window);
-
-private:
- ReorgFunctionPtr _func;
- const ITensor *_input;
- ITensor *_output;
- int32_t _stride;
+ const ITensor *_input;
+ ITensor *_output;
+ int32_t _stride;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEREORGLAYERKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h b/arm_compute/core/NEON/kernels/NEStackLayerKernel.h
index 3a9e81fa94..42a0539c9f 100644
--- a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEStackLayerKernel.h
@@ -84,24 +84,10 @@ public:
void run(const Window &window, const ThreadInfo &info) override;
private:
- /** Template function to run the stack
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <typename T>
- void run_stack(const Window &window);
-
- /** Common signature for all the specialised stack functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using StackFunctionPtr = void (NEStackLayerKernel::*)(const Window &window);
-
const ITensor *_input;
ITensor *_output;
unsigned int _axis;
unsigned int _idx_input;
- StackFunctionPtr _func;
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NESTACKLAYERKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
index 21f36f6c2b..bba18a8fa8 100644
--- a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
+++ b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -99,12 +99,9 @@ public:
void run(const Window &window, const ThreadInfo &info) override;
private:
- using WeightsReshapeKernel = void(const ITensor *input, const ITensor *bias, ITensor *output, const Window &window);
-
- WeightsReshapeKernel *_func;
- const ITensor *_input;
- const ITensor *_bias;
- ITensor *_output;
+ const ITensor *_input;
+ const ITensor *_bias;
+ ITensor *_output;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H__ */
diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.cpp b/src/core/NEON/kernels/NEReorgLayerKernel.cpp
index 8baea2b990..ece5aa431c 100644
--- a/src/core/NEON/kernels/NEReorgLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEReorgLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -67,47 +67,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i
}
} // namespace
-template <typename T>
-void NEReorgLayerKernel::run_reorg(const Window &window)
-{
- const DataLayout data_layout = _input->info()->data_layout();
- const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
-
- const unsigned int stride = _stride;
- const unsigned int out_c = _output->info()->tensor_shape()[idx_c] / (stride * stride);
- const uint8_t *in_ptr = _input->buffer();
-
- // Collapse
- Window collapsed_window = window.collapse_if_possible(window, 4);
-
- // Create Iterator
- Iterator out(_output, collapsed_window);
-
- // Perform reorg
- execute_window_loop(collapsed_window, [&](const Coordinates & id)
- {
- // Get spatial coords and channels
- const unsigned int w = id[idx_w];
- const unsigned int h = id[idx_h];
- const unsigned int c = id[idx_c];
-
- // Calculate mapping
- const unsigned int offset = c / out_c;
- Coordinates map_coords = id;
- map_coords.set(idx_w, w * stride + offset % stride);
- map_coords.set(idx_h, h * stride + offset / stride);
- map_coords.set(idx_c, c % out_c);
-
- // Perform mapping
- *(reinterpret_cast<T *>(out.ptr())) = *(reinterpret_cast<const T *>(in_ptr + _input->info()->offset_element_in_bytes(map_coords)));
- },
- out);
-}
-
NEReorgLayerKernel::NEReorgLayerKernel()
- : _func(nullptr), _input(nullptr), _output(nullptr), _stride(1)
+ : _input(nullptr), _output(nullptr), _stride(1)
{
}
@@ -122,27 +83,10 @@ void NEReorgLayerKernel::configure(const ITensor *input, ITensor *output, int32_
// Perform validation step
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), stride));
- _func = nullptr;
_input = input;
_output = output;
_stride = stride;
- switch(input->info()->element_size())
- {
- case 1:
- _func = &NEReorgLayerKernel::run_reorg<uint8_t>;
- break;
- case 2:
- _func = &NEReorgLayerKernel::run_reorg<uint16_t>;
- break;
- case 4:
- _func = &NEReorgLayerKernel::run_reorg<uint32_t>;
- break;
- default:
- ARM_COMPUTE_ERROR("Element size not supported");
- break;
- }
-
// The NEReorgLayerKernel doesn't need padding so update_window_and_padding() can be skipped
output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
@@ -164,9 +108,39 @@ void NEReorgLayerKernel::run(const Window &window, const ThreadInfo &info)
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
- if(_func != nullptr)
+ const DataLayout data_layout = _input->info()->data_layout();
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+
+ const unsigned int stride = _stride;
+ const unsigned int out_c = _output->info()->tensor_shape()[idx_c] / (stride * stride);
+ const uint8_t *in_ptr = _input->buffer();
+
+ // Collapse
+ Window collapsed_window = window.collapse_if_possible(window, 4);
+
+ // Create Iterator
+ Iterator out(_output, collapsed_window);
+
+ // Perform reorg
+ execute_window_loop(collapsed_window, [&](const Coordinates & id)
{
- (this->*_func)(window);
- }
+ // Get spatial coords and channels
+ const unsigned int w = id[idx_w];
+ const unsigned int h = id[idx_h];
+ const unsigned int c = id[idx_c];
+
+ // Calculate mapping
+ const unsigned int offset = c / out_c;
+ Coordinates map_coords = id;
+ map_coords.set(idx_w, w * stride + offset % stride);
+ map_coords.set(idx_h, h * stride + offset / stride);
+ map_coords.set(idx_c, c % out_c);
+
+ // Perform mapping
+ std::memcpy(out.ptr(), in_ptr + _input->info()->offset_element_in_bytes(map_coords), _input->info()->element_size());
+ },
+ out);
}
} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp
index 62e48829f6..36398cf89a 100644
--- a/src/core/NEON/kernels/NEReverseKernel.cpp
+++ b/src/core/NEON/kernels/NEReverseKernel.cpp
@@ -189,31 +189,21 @@ void NEReverseKernel::run(const Window &window, const ThreadInfo &info)
switch(_input->info()->data_type())
{
case DataType::F32:
- run_reverse<float>(window, _input, _axis, _output);
+ case DataType::U32:
+ case DataType::S32:
+ run_reverse<uint32_t>(window, _input, _axis, _output);
break;
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
- run_reverse<float16_t>(window, _input, _axis, _output);
- break;
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- case DataType::U32:
- run_reverse<uint32_t>(window, _input, _axis, _output);
- break;
- case DataType::S32:
- run_reverse<int32_t>(window, _input, _axis, _output);
- break;
case DataType::S16:
- run_reverse<int16_t>(window, _input, _axis, _output);
- break;
case DataType::U16:
run_reverse<uint16_t>(window, _input, _axis, _output);
break;
case DataType::QASYMM8:
case DataType::U8:
- run_reverse<uint8_t>(window, _input, _axis, _output);
- break;
case DataType::S8:
- run_reverse<int8_t>(window, _input, _axis, _output);
+ run_reverse<uint8_t>(window, _input, _axis, _output);
break;
default:
ARM_COMPUTE_ERROR("Data type not supported");
diff --git a/src/core/NEON/kernels/NEStackLayerKernel.cpp b/src/core/NEON/kernels/NEStackLayerKernel.cpp
index 0c33f36983..3447d59bcc 100644
--- a/src/core/NEON/kernels/NEStackLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEStackLayerKernel.cpp
@@ -87,7 +87,7 @@ inline Coordinates shift_from_axis_and_replace_coordinate(const Coordinates &id,
} // namespace
NEStackLayerKernel::NEStackLayerKernel()
- : _input(nullptr), _output(nullptr), _axis(), _idx_input(), _func(nullptr)
+ : _input(nullptr), _output(nullptr), _axis(), _idx_input()
{
}
@@ -101,22 +101,6 @@ void NEStackLayerKernel::configure(const ITensor *input, unsigned int axis, unsi
_axis = axis;
_idx_input = idx_input;
- switch(input->info()->element_size())
- {
- case 1:
- _func = &NEStackLayerKernel::run_stack<uint8_t>;
- break;
- case 2:
- _func = &NEStackLayerKernel::run_stack<uint16_t>;
- break;
- case 4:
- _func = &NEStackLayerKernel::run_stack<uint32_t>;
- break;
- default:
- ARM_COMPUTE_ERROR("Element size not supported");
- break;
- }
-
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), axis, num_tensors, output->info());
@@ -137,15 +121,6 @@ void NEStackLayerKernel::run(const Window &window, const ThreadInfo &info)
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- if(_func != nullptr)
- {
- (this->*_func)(window);
- }
-}
-
-template <typename T>
-void NEStackLayerKernel::run_stack(const Window &window)
-{
Window window_out;
window_out.use_tensor_dimensions(_output->info()->tensor_shape());
@@ -160,9 +135,9 @@ void NEStackLayerKernel::run_stack(const Window &window)
execute_window_loop(window, [&](const Coordinates & id)
{
- Coordinates id_out = shift_from_axis_and_replace_coordinate(id, _axis, _idx_input);
- const int idx = id_out[0] * stride_x + id_out[1] * stride_y + id_out[2] * stride_z + id_out[3] * stride_w + id_out[4] * stride_k;
- *(reinterpret_cast<T *>(output.ptr() + idx)) = *(reinterpret_cast<const T *>(input.ptr()));
+ Coordinates id_out = shift_from_axis_and_replace_coordinate(id, _axis, _idx_input);
+ const int idx = id_out[0] * stride_x + id_out[1] * stride_y + id_out[2] * stride_z + id_out[3] * stride_w + id_out[4] * stride_k;
+ std::memcpy(output.ptr() + idx, input.ptr(), _input->info()->element_size());
},
input);
}
diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
index 4a0cf27592..624833adfb 100644
--- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
+++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
@@ -34,59 +34,6 @@ using namespace arm_compute;
namespace
{
-template <typename T>
-void weights_reshape(const ITensor *input, const ITensor *bias, ITensor *output, const Window &window)
-{
- const unsigned int kernel_size_x = input->info()->dimension(0);
- const unsigned int kernel_size_y = input->info()->dimension(1);
- const unsigned int kernel_depth = input->info()->dimension(2);
- const unsigned int input_stride_x = input->info()->strides_in_bytes().x();
- const unsigned int input_stride_y = input->info()->strides_in_bytes().y();
- const unsigned int input_stride_z = input->info()->strides_in_bytes().z();
- const unsigned int output_stride_y = output->info()->strides_in_bytes().y();
-
- // Create iterators
- Iterator in(input, window);
- execute_window_loop(window, [&](const Coordinates & id)
- {
- // Get column index
- const int kernel_idx = id[3];
- const int kernel_idz = id[4];
-
- // Setup pointers
- const uint8_t *tmp_input_ptr = in.ptr();
- uint8_t *tmp_output_ptr = output->ptr_to_element(Coordinates(kernel_idx, 0, kernel_idz));
- const uint8_t *curr_input_row_ptr = tmp_input_ptr;
- const uint8_t *curr_input_depth_ptr = tmp_input_ptr;
-
- // Linearize volume
- for(unsigned int d = 0; d < kernel_depth; ++d)
- {
- for(unsigned int j = 0; j < kernel_size_y; ++j)
- {
- for(unsigned int i = 0; i < kernel_size_x; ++i)
- {
- *(reinterpret_cast<T *>(tmp_output_ptr)) = *(reinterpret_cast<const T *>(tmp_input_ptr));
- tmp_input_ptr += input_stride_x;
- tmp_output_ptr += output_stride_y;
- }
- curr_input_row_ptr += input_stride_y;
- tmp_input_ptr = curr_input_row_ptr;
- }
- curr_input_depth_ptr += input_stride_z;
- curr_input_row_ptr = curr_input_depth_ptr;
- tmp_input_ptr = curr_input_depth_ptr;
- }
-
- // Add bias
- if(bias != nullptr)
- {
- *(reinterpret_cast<T *>(tmp_output_ptr)) = *(reinterpret_cast<const T *>(bias->ptr_to_element(Coordinates(kernel_idx, kernel_idz))));
- }
- },
- in);
-}
-
TensorShape get_output_shape(const ITensorInfo *input, bool has_bias)
{
TensorShape output_shape{ input->tensor_shape() };
@@ -141,7 +88,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
} // namespace
NEWeightsReshapeKernel::NEWeightsReshapeKernel()
- : _func(nullptr), _input(nullptr), _bias(nullptr), _output(nullptr)
+ : _input(nullptr), _bias(nullptr), _output(nullptr)
{
}
@@ -161,30 +108,6 @@ void NEWeightsReshapeKernel::configure(const ITensor *input, const ITensor *bias
_bias = bias;
_output = output;
- switch(_input->info()->element_size())
- {
- case 4:
- {
- _func = &weights_reshape<uint32_t>;
- break;
- }
- case 2:
- {
- _func = &weights_reshape<uint16_t>;
- break;
- }
- case 1:
- {
- _func = &weights_reshape<uint8_t>;
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR_ON("Element size not supported");
- break;
- }
- }
-
// Configure kernel
auto win_config = validate_and_configure_window(input->info(), output->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
@@ -205,5 +128,52 @@ void NEWeightsReshapeKernel::run(const Window &window, const ThreadInfo &info)
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- (*_func)(_input, _bias, _output, window);
+ const unsigned int kernel_size_x = _input->info()->dimension(0);
+ const unsigned int kernel_size_y = _input->info()->dimension(1);
+ const unsigned int kernel_depth = _input->info()->dimension(2);
+ const unsigned int input_stride_x = _input->info()->strides_in_bytes().x();
+ const unsigned int input_stride_y = _input->info()->strides_in_bytes().y();
+ const unsigned int input_stride_z = _input->info()->strides_in_bytes().z();
+ const unsigned int output_stride_y = _output->info()->strides_in_bytes().y();
+
+ // Create iterators
+ Iterator in(_input, window);
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ // Get column index
+ const int kernel_idx = id[3];
+ const int kernel_idz = id[4];
+
+ // Setup pointers
+ const uint8_t *tmp_input_ptr = in.ptr();
+ uint8_t *tmp_output_ptr = _output->ptr_to_element(Coordinates(kernel_idx, 0, kernel_idz));
+ const uint8_t *curr_input_row_ptr = tmp_input_ptr;
+ const uint8_t *curr_input_depth_ptr = tmp_input_ptr;
+
+ // Linearize volume
+ for(unsigned int d = 0; d < kernel_depth; ++d)
+ {
+ for(unsigned int j = 0; j < kernel_size_y; ++j)
+ {
+ for(unsigned int i = 0; i < kernel_size_x; ++i)
+ {
+ std::memcpy(tmp_output_ptr, tmp_input_ptr, _input->info()->element_size());
+ tmp_input_ptr += input_stride_x;
+ tmp_output_ptr += output_stride_y;
+ }
+ curr_input_row_ptr += input_stride_y;
+ tmp_input_ptr = curr_input_row_ptr;
+ }
+ curr_input_depth_ptr += input_stride_z;
+ curr_input_row_ptr = curr_input_depth_ptr;
+ tmp_input_ptr = curr_input_depth_ptr;
+ }
+
+ // Add bias
+ if(_bias != nullptr)
+ {
+ std::memcpy(tmp_output_ptr, _bias->ptr_to_element(Coordinates(kernel_idx, kernel_idz)), _input->info()->element_size());
+ }
+ },
+ in);
}