aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-01-22 16:29:17 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:45:00 +0000
commitd05dce46a14a7b67f322328ecd95bf96bdd30bae (patch)
tree6e001f539969a1a669241a72e78ff5a62998a984 /src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp
parent5d9d019b2c7ca3dc59bfbb44b3169ee5cd71dc79 (diff)
downloadComputeLibrary-d05dce46a14a7b67f322328ecd95bf96bdd30bae.tar.gz
COMPMID-791: Generic Depthwise Convolution Layer NEON QASYMM8
Change-Id: I33cf54e68f6c097ac58b6f16c3f9a720978f09cd Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/117289 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp110
1 files changed, 74 insertions, 36 deletions
diff --git a/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp
index 6585fdb8b8..36b17bfc4c 100644
--- a/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,16 +37,59 @@
using namespace arm_compute;
+namespace
+{
+template <typename T>
+void weights_reshape(const ITensor *input, const ITensor *bias, ITensor *output, const Window &window)
+{
+ const int input_w = input->info()->dimension(0);
+ const int output_stride_x = output->info()->strides_in_bytes().x();
+ const int output_stride_y = output->info()->strides_in_bytes().y();
+
+ Window window_in(window);
+ // The first three dimensions of the input are increased by the inner loops
+ window_in.set(Window::DimX, Window::Dimension(0, input->info()->dimension(0), input->info()->dimension(0)));
+ window_in.set(Window::DimY, Window::Dimension(0, input->info()->dimension(1), 1));
+ window_in.set(Window::DimZ, Window::Dimension(0, input->info()->dimension(2), 1));
+
+ // Setup output window
+ Window window_out;
+ window_out.set(Window::DimX, Window::Dimension(0, 0, 0));
+ window_out.set(Window::DimY, Window::Dimension(0, 0, 0));
+
+ Iterator in(input, window_in);
+ Iterator out(output, window_out);
+
+ execute_window_loop(window_in, [&](const Coordinates & id)
+ {
+ auto input_ptr = reinterpret_cast<T *>(in.ptr());
+ auto output_ptr = reinterpret_cast<T *>(out.ptr() + id.y() * input_w * output_stride_x + id.z() * output_stride_y);
+
+ for(int i = 0; i < input_w; ++i, ++input_ptr)
+ {
+ *(output_ptr + i) = *input_ptr;
+ }
+
+ if(bias != nullptr)
+ {
+ *(output_ptr + input_w) = *(reinterpret_cast<T *>(bias->ptr_to_element(Coordinates(id.z()))));
+ }
+ },
+ in, out);
+}
+} // namespace
+
NEDepthwiseWeightsReshapeKernel::NEDepthwiseWeightsReshapeKernel()
- : _input(nullptr), _output(nullptr), _biases(nullptr)
+ : _func(nullptr), _input(nullptr), _output(nullptr), _biases(nullptr)
{
}
void NEDepthwiseWeightsReshapeKernel::configure(const ITensor *input, ITensor *output, const ITensor *biases)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
+ ARM_COMPUTE_ERROR_ON(is_data_type_quantized_asymmetric(input->info()->data_type()) && (biases != nullptr));
ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != output->info()->dimension(1));
ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != (input->info()->dimension(0) * input->info()->dimension(1) + ((biases != nullptr) ? 1 : 0)));
@@ -62,6 +105,30 @@ void NEDepthwiseWeightsReshapeKernel::configure(const ITensor *input, ITensor *o
_output = output;
_biases = biases;
+ switch(_input->info()->element_size())
+ {
+ case 4:
+ {
+ _func = &weights_reshape<uint32_t>;
+ break;
+ }
+ case 2:
+ {
+ _func = &weights_reshape<uint16_t>;
+ break;
+ }
+ case 1:
+ {
+ _func = &weights_reshape<uint8_t>;
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR_ON("Element size not supported");
+ break;
+ }
+ }
+
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps());
// The NEDepthwiseWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped
@@ -74,39 +141,10 @@ void NEDepthwiseWeightsReshapeKernel::run(const Window &window, const ThreadInfo
{
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- const int input_w = _input->info()->dimension(0);
- const int output_stride_x = _output->info()->strides_in_bytes().x();
- const int output_stride_y = _output->info()->strides_in_bytes().y();
-
- Window window_in(window);
- // The first three dimensions of the input are increased by the inner loops
- window_in.set(Window::DimX, Window::Dimension(0, _input->info()->dimension(0), _input->info()->dimension(0)));
- window_in.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), 1));
- window_in.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(2), 1));
-
- // Setup output window
- Window window_out;
- window_out.set(Window::DimX, Window::Dimension(0, 0, 0));
- window_out.set(Window::DimY, Window::Dimension(0, 0, 0));
-
- Iterator in(_input, window_in);
- Iterator out(_output, window_out);
-
- execute_window_loop(window_in, [&](const Coordinates & id)
+ if(_func != nullptr)
{
- auto input_ptr = reinterpret_cast<float *>(in.ptr());
- auto output_ptr = reinterpret_cast<float *>(out.ptr() + id.y() * input_w * output_stride_x + id.z() * output_stride_y);
-
- for(int i = 0; i < input_w; ++i, ++input_ptr)
- {
- *(output_ptr + i) = *input_ptr;
- }
-
- if(_biases != nullptr)
- {
- *(output_ptr + input_w) = *(reinterpret_cast<float *>(_biases->ptr_to_element(Coordinates(id.z()))));
- }
- },
- in, out);
+ (*_func)(_input, _biases, _output, window);
+ }
}