aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorXinghang Zhou <xinghang.zhou@arm.com>2018-01-17 11:23:39 +0800
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:45:00 +0000
commit33ff9ef467153eef05b700820d859515a52481f4 (patch)
tree87f0ac284aeeac696f0652bdb4489177ef89a48e /src
parent1c0d0ffb99814749d5c48df282dc212cb939094a (diff)
downloadComputeLibrary-33ff9ef467153eef05b700820d859515a52481f4.tar.gz
APPBROWSER-400: Implement the tensorshift kernel for fixing DC's alignment issue on OpenGL ES
Change-Id: I7a8489bb0fddc72899ea165e414ee87bdbfb45b3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118106 Reviewed-by: Joel Liang <joel.liang@arm.com> Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/GLES_COMPUTE/GCKernelLibrary.cpp5
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/tensor_shift.cs134
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp22
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp108
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp19
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCTensorShift.cpp40
6 files changed, 321 insertions, 7 deletions
diff --git a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
index 0b9cd3f4ee..d4ce3888fd 100644
--- a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
+++ b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
@@ -190,6 +190,7 @@ void GCKernel::update_shader_params()
const std::map<std::string, std::string> GCKernelLibrary::_shader_program_map =
{
{ "absdiff", "absdiff.cs" },
+ { "tensorshift", "tensor_shift.cs" },
{ "direct_convolution1x1", "direct_convolution1x1.cs" },
{ "direct_convolution3x3", "direct_convolution3x3.cs" },
{ "direct_convolution5x5", "direct_convolution5x5.cs" },
@@ -235,6 +236,10 @@ const std::map<std::string, std::string> GCKernelLibrary::_program_source_map =
#include "./cs_shaders/absdiff.csembed"
},
{
+ "tensor_shift.cs",
+#include "./cs_shaders/tensor_shift.csembed"
+ },
+ {
"convolution_layer.cs",
#include "./cs_shaders/convolution_layer.csembed"
},
diff --git a/src/core/GLES_COMPUTE/cs_shaders/tensor_shift.cs b/src/core/GLES_COMPUTE/cs_shaders/tensor_shift.cs
new file mode 100644
index 0000000000..a0af315c76
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/tensor_shift.cs
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+
+#include "helpers_cs.h"
+
+#if defined(DATA_TYPE_FP16)
+precision mediump float;
+#endif // DATA_TYPE_FP16
+
+/** This kernel performs a shift to move "pad_x" columns to the right.
+ *
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32"
+ * @note The width must be passed at compile time using "#define WIDTH n" e.g. "#define WIDTH 1"
+ *
+ * @param[in,out] src_ptr Pointer to the source tensor slice. Supported data types: F16/F32
+ * @param[in] src_attrs The attributes of the source tensor
+ * @param[in] pad_x The padding of the source tensor in x dimension
+ */
+SHADER_PARAMS_DECLARATION
+{
+ Tensor3DAttributes src_attrs;
+ uint pad_x;
+};
+
+#if defined(DATA_TYPE_FP16)
+TENSOR_DECLARATION(1, srcBuffer, uint, src_ptr, src_shift, 2, restrict);
+
+void main()
+{
+ Tensor3DIterator src_iter = CONVERT_TO_TENSOR3D_ITERATOR(src_attrs, src_shift);
+ int n = int(pad_x) % 2;
+
+ if(n == 1)
+ {
+ int i = 0;
+ if((WIDTH % 2) == 1)
+ {
+ i = WIDTH + int(pad_x) - 2;
+ }
+ else
+ {
+ vec2 s0_end = LOAD_UNPACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * (WIDTH - 2))));
+ vec2 s_end = vec2(s0_end.y, 0.f);
+ STORE_PACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * (WIDTH + int(pad_x) - 1))), s_end);
+ i = WIDTH + int(pad_x) - 3;
+ }
+ for(; i >= (int(pad_x) + 1); i = i - 2)
+ {
+ vec2 s0 = LOAD_UNPACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * (i - int(pad_x) - 1))));
+ vec2 s1 = LOAD_UNPACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * (i - int(pad_x) + 1))));
+ vec2 s = vec2(s0.y, s1.x);
+ STORE_PACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * i)), s);
+ }
+ for(int j = 0; j < (int(pad_x) - 1); j = j + 2)
+ {
+ vec2 s_origin = vec2(0.f);
+ STORE_PACK2_CURRENT_ITEM_HALF(src_ptr, src_iter, s_origin);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, 4);
+ }
+ vec2 s0_origin = LOAD_UNPACK2_CURRENT_ITEM_HALF(src_ptr, src_iter);
+ vec2 s_origin = vec2(0.f, s0_origin.x);
+ STORE_PACK2_CURRENT_ITEM_HALF(src_ptr, src_iter, s_origin);
+ }
+ else
+ {
+ int i = 0;
+ if((WIDTH % 2) == 0)
+ {
+ i = WIDTH + int(pad_x) - 2;
+ }
+ else
+ {
+ vec2 s0_end = LOAD_UNPACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * (WIDTH - 1))));
+ vec2 s_end = vec2(s0_end.x, 0.f);
+ STORE_PACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * (WIDTH + int(pad_x) - 1))), s_end);
+ i = WIDTH + int(pad_x) - 3;
+ }
+ for(; i >= (int(pad_x)); i = i - 2)
+ {
+ vec2 s = LOAD_UNPACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * (i - int(pad_x)))));
+ STORE_PACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * i)), s);
+ }
+ for(int j = 0; j < int(pad_x); j = j + 2)
+ {
+ vec2 s = vec2(0.f);
+ STORE_PACK2_CURRENT_ITEM_HALF(src_ptr, src_iter, s);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, 4);
+ }
+ }
+}
+#elif defined(DATA_TYPE_FP32)
+TENSOR_DECLARATION(1, srcBuffer, float, src_ptr, src_shift, 2, restrict);
+
+void main()
+{
+ Tensor3DIterator src_iter = CONVERT_TO_TENSOR3D_ITERATOR(src_attrs, src_shift);
+
+ for(int i = (WIDTH + int(pad_x) - 1); i >= int(pad_x); i--)
+ {
+ float sorigin = LOAD(src_ptr, TENSOR_OFFSET_ADVANCE(src_iter, (i - int(pad_x))));
+ STORE(src_ptr, TENSOR_OFFSET_ADVANCE(src_iter, i), sorigin);
+ }
+ for(int j = 0; j < int(pad_x); j++)
+ {
+ STORE_CURRENT_ITEM(src_ptr, src_iter, 0.f);
+ TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, 4);
+ }
+}
+#else /* DATA_TYPE_FP16 */
+#error Data type not supported
+#endif /* DATA_TYPE_FP16 */
diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
index 23f1c2eada..fd461c53cd 100644
--- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
@@ -317,12 +317,20 @@ void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *inp
const int output_padding_bottom = ceil_to_multiple(output_height, num_elems_written_per_iteration_y * _lws[1]) - output_height;
// Calculate input right and bottom border
- const int input_width = input->info()->dimension(0);
- const int input_height = input->info()->dimension(1);
- const int upper_bound_w = ceil_to_multiple(((output_width + output_padding_right) * _conv_stride_x + (kernel_size - 1)), num_elems_read_per_iteration_x * _lws[0]) - _conv_pad_x - input_width;
- const int upper_bound_h = ceil_to_multiple(((output_height + output_padding_bottom) * _conv_stride_y + (kernel_size - 1)), num_elems_read_per_iteration_y * _lws[1]) - _conv_pad_y - input_height;
- const int padding_right = std::max(upper_bound_w, _conv_pad_x);
- const int padding_bottom = std::max(upper_bound_h, _conv_pad_y);
+ const int input_width = input->info()->dimension(0);
+ const int input_height = input->info()->dimension(1);
+ const int input_total_width = std::max(int(input->info()->padding().left), int(_conv_pad_x)) + input_width + std::max(int(input->info()->padding().right), int(_conv_pad_x));
+ const int input_total_height = std::max(int(input->info()->padding().top), int(_conv_pad_y)) + input_height + std::max(int(input->info()->padding().bottom), int(_conv_pad_y));
+ const int padding_right1 = ceil_to_multiple(input_total_width, num_elems_read_per_iteration_x * _lws[0]) - input_width - _conv_pad_x;
+ const int padding_bottom1 = ceil_to_multiple(input_total_height, num_elems_read_per_iteration_y * _lws[1]) - input_height - _conv_pad_y;
+
+ const int upper_bound_w = ceil_to_multiple(((output_width + output_padding_right) * _conv_stride_x + (kernel_size - 1)), num_elems_read_per_iteration_x * _lws[0]) - _conv_pad_x - input_width;
+ const int upper_bound_h = ceil_to_multiple(((output_height + output_padding_bottom) * _conv_stride_y + (kernel_size - 1)), num_elems_read_per_iteration_y * _lws[1]) - _conv_pad_y - input_height;
+ const int padding_right2 = std::max(upper_bound_w, _conv_pad_x);
+ const int padding_bottom2 = std::max(upper_bound_h, _conv_pad_y);
+
+ const int padding_right = std::max(padding_right1, padding_right2);
+ const int padding_bottom = std::max(padding_bottom1, padding_bottom2);
BorderSize border = BorderSize(0, output_padding_right, output_padding_bottom, 0);
@@ -406,6 +414,8 @@ void GCDirectConvolutionLayerKernel<kernel_size>::run(const Window &window)
add_1D_tensor_argument(idx1, _bias, 4, slice_bias);
}
+ slice.shift(Window::DimX, -(_output->info()->padding()).left);
+
do
{
unsigned int idx = 0;
diff --git a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp
new file mode 100644
index 0000000000..c2182171a6
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+using namespace arm_compute::gles_compute;
+
+GCTensorShiftKernel::GCTensorShiftKernel()
+ : _input(nullptr), _lws(gles::NDRange(1U, 1U, 1U))
+{
+}
+
+void GCTensorShiftKernel::configure(IGCTensor *input)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+
+ _input = input;
+
+ std::set<std::string> options;
+ options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0]));
+ options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1]));
+ options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2]));
+ options.emplace("#define WIDTH " + support::cpp11::to_string(input->info()->dimension(0)));
+
+ std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+ options.emplace(("#define " + dt_name));
+
+ unsigned int num_elems_written_per_iteration_x = input->info()->dimension(0) + input->info()->padding().left + input->info()->padding().right;
+ unsigned int num_elems_written_per_iteration_y = 1;
+ unsigned int num_elems_written_per_iteration_z = 1;
+
+ std::stringstream kernel_name;
+ kernel_name << "tensorshift";
+
+ _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name.str(), options));
+
+ Window win = calculate_max_enlarged_window(*input->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y, num_elems_written_per_iteration_z));
+ AccessWindowHorizontal input_access(input->info(), 0, num_elems_written_per_iteration_x);
+
+ update_window_and_padding(win, input_access);
+
+ IGCKernel::configure(win);
+}
+
+void GCTensorShiftKernel::run(const Window &window)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+ _kernel.use();
+
+ // Get initial windows
+ Window slice = window.first_slice_window_3D();
+ slice.shift(Window::DimX, -(_input->info()->padding()).left);
+
+ do
+ {
+ unsigned int idx = 0;
+
+ add_3D_tensor_argument(idx, _input, 1, slice);
+
+ const PaddingSize &padding1 = _input->info()->padding();
+
+ if(int(padding1.left) == 0)
+ {
+ break;
+ }
+
+ _kernel.set_argument(idx++, static_cast<unsigned int>(padding1.left));
+
+ _kernel.update_shader_params();
+ enqueue(*this, slice, _lws);
+ }
+ while(window.slide_window_slice_3D(slice));
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
index ae9dd51b8e..769733ca66 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,10 +27,16 @@
#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
#include "support/ToolchainSupport.h"
using namespace arm_compute;
+GCDirectConvolutionLayer::GCDirectConvolutionLayer()
+ : _kernel(nullptr), _border_handler(), _shift_handler()
+{
+}
void GCDirectConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info)
{
@@ -61,4 +67,15 @@ void GCDirectConvolutionLayer::configure(const IGCTensor *input, const IGCTensor
}
_border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue(0));
+
+ _shift_handler.configure(output);
+}
+
+void GCDirectConvolutionLayer::run()
+{
+ GCScheduler::get().dispatch(_border_handler, false);
+ GCScheduler::get().memory_barrier();
+ GCScheduler::get().dispatch(*_kernel);
+ GCScheduler::get().memory_barrier();
+ GCScheduler::get().dispatch(_shift_handler);
}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCTensorShift.cpp b/src/runtime/GLES_COMPUTE/functions/GCTensorShift.cpp
new file mode 100644
index 0000000000..93496f4b74
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCTensorShift.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCTensorShift.h"
+
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Utils.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void GCTensorShift::configure(IGCTensor *input)
+{
+ auto k = arm_compute::support::cpp14::make_unique<GCTensorShiftKernel>();
+ k->configure(input);
+ _kernel = std::move(k);
+}