aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-02-18 20:08:02 +0000
committerGiuseppe Rossini <giuseppe.rossini@arm.com>2019-03-05 11:44:18 +0000
commit574775c7fa78a094bbeb7f9f87aca832936884e2 (patch)
treea405e7a265865acc1348860514de28de2835ce24 /src/core/CL/kernels/CLDequantizationLayerKernel.cpp
parent79fa9a22022824735986f74557bf38095eb2284d (diff)
downloadComputeLibrary-574775c7fa78a094bbeb7f9f87aca832936884e2.tar.gz
COMPMID-1937: Adds support for DequantizationLayer for NEON/CL.
Change-Id: I4b73edd176a277294e0e42e642460bc61210778a Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/744 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLDequantizationLayerKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLDequantizationLayerKernel.cpp93
1 files changed, 45 insertions, 48 deletions
diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
index d4c1bec5f4..78cc5596dd 100644
--- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,7 @@
#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
@@ -36,74 +37,78 @@ using namespace arm_compute;
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *min_max)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output, min_max);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() < 3);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8);
if(output->tensor_shape().total_size() > 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
}
return Status{};
}
-std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, ITensorInfo *min_max)
+std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
{
+ // Configure kernel window
+ Window win = calculate_max_window(*input, Steps());
+
// Output tensor auto initialization if not yet initialized
auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32);
- constexpr unsigned int num_elems_processed_per_iteration = 4;
-
- // Configure window
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- AccessWindowStatic min_max_access(min_max, 0, 0, 2, min_max->dimension(1));
-
- // Update window and padding
- bool window_changed = update_window_and_padding(win, input_access, output_access, min_max_access);
+ // CLDequantizationLayerKernel doesn't need padding so update_window_and_padding() can be skipped
+ Coordinates coord;
+ coord.set_num_dimensions(output->num_dimensions());
+ output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
- output_access.set_valid_region(win, input->valid_region());
-
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_tuple(err, win);
+ return std::make_tuple(Status{}, win);
}
} // namespace
CLDequantizationLayerKernel::CLDequantizationLayerKernel()
- : _input(nullptr), _output(nullptr), _min_max(nullptr)
+ : _input(nullptr), _output(nullptr)
{
}
-void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *min_max)
+void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, min_max);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), min_max->info()));
-
- _input = input;
- _output = output;
- _min_max = min_max;
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
- // Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("dequantization_layer"));
+ _input = input;
+ _output = output;
- // Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info(), min_max->info());
+ const int vec_size_x = 16 / output->info()->element_size();
+ const int output_width_x = output->info()->tensor_shape().x();
+ const bool multi_access_x = (output_width_x / vec_size_x > 0);
- ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
+ // Create and update the window (if needed)
+ Window win = calculate_max_window(*output->info());
+ if(multi_access_x)
+ {
+ win.set(Window::DimX,
+ Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
+ }
+ ICLKernel::configure_internal(win);
- ICLKernel::configure_internal(std::get<1>(win_config));
+ // Create kernel
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(input->info()->quantization_info().scale));
+ build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset));
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
+ build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("dequantization_layer", build_opts.options()));
}
-Status CLDequantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *min_max)
+Status CLDequantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, min_max));
- ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get(), min_max->clone().get())));
-
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
return Status{};
}
@@ -115,20 +120,12 @@ void CLDequantizationLayerKernel::run(const Window &window, cl::CommandQueue &qu
Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), 3);
Window slice = window_collapsed.first_slice_window_3D();
- Window min_max_window = window;
- min_max_window.set(Window::DimX, Window::Dimension(0, 0, 0));
- min_max_window.set(Window::DimY, Window::Dimension(0, _min_max->info()->dimension(1), 1));
- min_max_window.set(Window::DimZ, Window::Dimension(0, 0, 0));
-
- Window min_max_slice = min_max_window.first_slice_window_1D();
-
do
{
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
- add_1D_tensor_argument(idx, _min_max, min_max_slice);
enqueue(queue, *this, slice);
}
- while(window_collapsed.slide_window_slice_3D(slice) && min_max_window.slide_window_slice_1D(min_max_slice));
+ while(window_collapsed.slide_window_slice_3D(slice));
}