From 33ff9ef467153eef05b700820d859515a52481f4 Mon Sep 17 00:00:00 2001 From: Xinghang Zhou Date: Wed, 17 Jan 2018 11:23:39 +0800 Subject: APPBROWSER-400: Implement the tensorshift kernel for fixing DC's alignment issue on OpenGL ES Change-Id: I7a8489bb0fddc72899ea165e414ee87bdbfb45b3 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118106 Reviewed-by: Joel Liang Tested-by: Jenkins Reviewed-by: Anthony Barbier --- arm_compute/core/GLES_COMPUTE/GCKernels.h | 1 + .../GLES_COMPUTE/kernels/GCTensorShiftKernel.h | 83 +++++++ arm_compute/runtime/GLES_COMPUTE/GCFunctions.h | 1 + .../functions/GCDirectConvolutionLayer.h | 25 +- .../runtime/GLES_COMPUTE/functions/GCTensorShift.h | 51 ++++ src/core/GLES_COMPUTE/GCKernelLibrary.cpp | 5 + src/core/GLES_COMPUTE/cs_shaders/tensor_shift.cs | 134 ++++++++++ .../kernels/GCDirectConvolutionLayerKernel.cpp | 22 +- .../GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp | 108 +++++++++ .../functions/GCDirectConvolutionLayer.cpp | 19 +- .../GLES_COMPUTE/functions/GCTensorShift.cpp | 40 +++ tests/datasets/ShapeDatasets.h | 22 ++ .../DirectConvolutionLayerTensorShift.cpp | 90 +++++++ .../DirectConvolutionLayerTensorShiftFixture.h | 269 +++++++++++++++++++++ 14 files changed, 859 insertions(+), 11 deletions(-) create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCTensorShift.h create mode 100644 src/core/GLES_COMPUTE/cs_shaders/tensor_shift.cs create mode 100644 src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp create mode 100644 src/runtime/GLES_COMPUTE/functions/GCTensorShift.cpp create mode 100644 tests/validation/GLES_COMPUTE/DirectConvolutionLayerTensorShift.cpp create mode 100644 tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h diff --git a/arm_compute/core/GLES_COMPUTE/GCKernels.h b/arm_compute/core/GLES_COMPUTE/GCKernels.h index a1f3c278c4..40312d121a 100644 --- a/arm_compute/core/GLES_COMPUTE/GCKernels.h +++ b/arm_compute/core/GLES_COMPUTE/GCKernels.h @@ -47,6 +47,7 @@ #include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h" diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h new file mode 100644 index 0000000000..5f108764b4 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCTENSORSHIFTKERNEL_H__ +#define __ARM_COMPUTE_GCTENSORSHIFTKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" + +namespace arm_compute +{ +class IGCTensor; +/** Interface for the kernel to shift valid data on a tensor. + * + * For example shifting 3x3 valid data with padding of 1 to right: + * @f[ + * \left( \begin{array}{ccccc} + * 0 & 0 & 0 & 0 & 0 \\ + * a00 & a01 & a02 & 0 & 0 \\ + * a10 & a11 & a12 & 0 & 0 \\ + * a20 & a21 & a22 & 0 & 0 \\ + * 0 & 0 & 0 & 0 & 0 \\ + * \end{array} \right) + * = + * \left( \begin{array}{ccccc} + * 0 & 0 & 0 & 0 & 0 \\ + * 0 & a00 & a01 & a02 & 0 \\ + * 0 & a10 & a11 & a12 & 0 \\ + * 0 & a20 & a21 & a22 & 0 \\ + * 0 & 0 & 0 & 0 & 0 \\ + * \end{array} \right) + * @f] + */ +class GCTensorShiftKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCTensorShiftKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCTensorShiftKernel(const GCTensorShiftKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCTensorShiftKernel &operator=(const GCTensorShiftKernel &) = delete; + /** Allow instances of this class to be moved */ + GCTensorShiftKernel(GCTensorShiftKernel &&) = default; + /** Allow instances of this class to be moved */ + GCTensorShiftKernel &operator=(GCTensorShiftKernel &&) = default; + /** Default destructor */ + ~GCTensorShiftKernel() = default; + /** Set the input of the kernel. + * + * @param[in,out] input Source tensor. Data types supported: F16/F32 + */ + void configure(IGCTensor *input); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + IGCTensor *_input; + gles::NDRange _lws; +}; +} +#endif /*__ARM_COMPUTE_GCTENSORSHIFTKERNEL_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h index bbd8218722..6f338568c2 100644 --- a/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h +++ b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h @@ -45,6 +45,7 @@ #include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h" #include "arm_compute/runtime/GLES_COMPUTE/functions/GCScale.h" #include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCTensorShift.h" #include "arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h" #endif /* __ARM_COMPUTE_GCFUNCTIONS_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h index 5472bdb9ea..c6b948be1f 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,12 @@ #ifndef __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYER_H__ #define __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYER_H__ +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" +#include "arm_compute/runtime/IFunction.h" #include @@ -34,14 +37,20 @@ namespace arm_compute { class IGCTensor; -/** Basic function to execute direct convolution function: +/** Basic function to execute direct convolution function. This function calls the following kernels: + * + * -# @ref GCDirectConvolutionLayerKernel + * -# @ref GCFillBorderKernel + * -# @ref GCTensorShiftKernel * * @note Supported kernel size: 1x1, 3x3, and 5x5 * @note This OpenGL ES implementation works with stride_x = 1 and 2 */ -class GCDirectConvolutionLayer : public IGCSimpleFunction +class GCDirectConvolutionLayer : public IFunction { public: + /** Default constructor */ + GCDirectConvolutionLayer(); /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -54,6 +63,14 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. */ void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run() override final; + +private: + std::unique_ptr _kernel; + GCFillBorderKernel _border_handler; + GCTensorShiftKernel _shift_handler; }; } #endif /* __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCTensorShift.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCTensorShift.h new file mode 100644 index 0000000000..dfcec57044 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCTensorShift.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCTENSORSHIFT_H__ +#define __ARM_COMPUTE_GCTENSORSHIFT_H__ + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" + +#include + +namespace arm_compute +{ +class IGCTensor; + +/** Basic function to execute shift function for tensor. This function applies to fix alignment issue on OpenGL ES: + * + * @note This alignment issue is introduced by limits of compute shader which requires 32/64/128bit alignment for data access on OpenGL ES + */ +class GCTensorShift : public IGCSimpleFunction +{ +public: + /** Initialise the kernel's input, output. + * + * @param[in,out] input Source tensor. Data types supported: F16/F32. + */ + void configure(IGCTensor *input); +}; +} +#endif /* __ARM_COMPUTE_GCTENSORSHIFT_H__ */ diff --git a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp index 0b9cd3f4ee..d4ce3888fd 100644 --- a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp +++ b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp @@ -190,6 +190,7 @@ void GCKernel::update_shader_params() const std::map GCKernelLibrary::_shader_program_map = { { "absdiff", "absdiff.cs" }, + { "tensorshift", "tensor_shift.cs" }, { "direct_convolution1x1", "direct_convolution1x1.cs" }, { "direct_convolution3x3", "direct_convolution3x3.cs" }, { "direct_convolution5x5", "direct_convolution5x5.cs" }, @@ -233,6 +234,10 @@ const std::map GCKernelLibrary::_program_source_map = { "absdiff.cs", #include "./cs_shaders/absdiff.csembed" + }, + { + "tensor_shift.cs", +#include "./cs_shaders/tensor_shift.csembed" }, { "convolution_layer.cs", diff --git a/src/core/GLES_COMPUTE/cs_shaders/tensor_shift.cs b/src/core/GLES_COMPUTE/cs_shaders/tensor_shift.cs new file mode 100644 index 0000000000..a0af315c76 --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/tensor_shift.cs @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; + +#include "helpers_cs.h" + +#if defined(DATA_TYPE_FP16) +precision mediump float; +#endif // DATA_TYPE_FP16 + +/** This kernel performs a shift to move "pad_x" columns to the right. + * + * @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32" + * @note The width must be passed at compile time using "#define WIDTH n" e.g. "#define WIDTH 1" + * + * @param[in,out] src_ptr Pointer to the source tensor slice. Supported data types: F16/F32 + * @param[in] src_attrs The attributes of the source tensor + * @param[in] pad_x The padding of the source tensor in x dimension + */ +SHADER_PARAMS_DECLARATION +{ + Tensor3DAttributes src_attrs; + uint pad_x; +}; + +#if defined(DATA_TYPE_FP16) +TENSOR_DECLARATION(1, srcBuffer, uint, src_ptr, src_shift, 2, restrict); + +void main() +{ + Tensor3DIterator src_iter = CONVERT_TO_TENSOR3D_ITERATOR(src_attrs, src_shift); + int n = int(pad_x) % 2; + + if(n == 1) + { + int i = 0; + if((WIDTH % 2) == 1) + { + i = WIDTH + int(pad_x) - 2; + } + else + { + vec2 s0_end = LOAD_UNPACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * (WIDTH - 2)))); + vec2 s_end = vec2(s0_end.y, 0.f); + STORE_PACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * (WIDTH + int(pad_x) - 1))), s_end); + i = WIDTH + int(pad_x) - 3; + } + for(; i >= (int(pad_x) + 1); i = i - 2) + { + vec2 s0 = LOAD_UNPACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * (i - int(pad_x) - 1)))); + vec2 s1 = LOAD_UNPACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * (i - int(pad_x) + 1)))); + vec2 s = vec2(s0.y, s1.x); + STORE_PACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * i)), s); + } + for(int j = 0; j < (int(pad_x) - 1); j = j + 2) + { + vec2 s_origin = vec2(0.f); + STORE_PACK2_CURRENT_ITEM_HALF(src_ptr, src_iter, s_origin); + TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, 4); + } + vec2 s0_origin = LOAD_UNPACK2_CURRENT_ITEM_HALF(src_ptr, src_iter); + vec2 s_origin = vec2(0.f, s0_origin.x); + STORE_PACK2_CURRENT_ITEM_HALF(src_ptr, src_iter, s_origin); + } + else + { + int i = 0; + if((WIDTH % 2) == 0) + { + i = WIDTH + int(pad_x) - 2; + } + else + { + vec2 s0_end = LOAD_UNPACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * (WIDTH - 1)))); + vec2 s_end = vec2(s0_end.x, 0.f); + STORE_PACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * (WIDTH + int(pad_x) - 1))), s_end); + i = WIDTH + int(pad_x) - 3; + } + for(; i >= (int(pad_x)); i = i - 2) + { + vec2 s = LOAD_UNPACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * (i - int(pad_x))))); + STORE_PACK2_HALF(src_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, (2 * i)), s); + } + for(int j = 0; j < int(pad_x); j = j + 2) + { + vec2 s = vec2(0.f); + STORE_PACK2_CURRENT_ITEM_HALF(src_ptr, src_iter, s); + TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, 4); + } + } +} +#elif defined(DATA_TYPE_FP32) +TENSOR_DECLARATION(1, srcBuffer, float, src_ptr, src_shift, 2, restrict); + +void main() +{ + Tensor3DIterator src_iter = CONVERT_TO_TENSOR3D_ITERATOR(src_attrs, src_shift); + + for(int i = (WIDTH + int(pad_x) - 1); i >= int(pad_x); i--) + { + float sorigin = LOAD(src_ptr, TENSOR_OFFSET_ADVANCE(src_iter, (i - int(pad_x)))); + STORE(src_ptr, TENSOR_OFFSET_ADVANCE(src_iter, i), sorigin); + } + for(int j = 0; j < int(pad_x); j++) + { + STORE_CURRENT_ITEM(src_ptr, src_iter, 0.f); + TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, 4); + } +} +#else /* DATA_TYPE_FP16 */ +#error Data type not supported +#endif /* DATA_TYPE_FP16 */ diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp index 23f1c2eada..fd461c53cd 100644 --- a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp @@ -317,12 +317,20 @@ void GCDirectConvolutionLayerKernel::configure(const IGCTensor *inp const int output_padding_bottom = ceil_to_multiple(output_height, num_elems_written_per_iteration_y * _lws[1]) - output_height; // Calculate input right and bottom border - const int input_width = input->info()->dimension(0); - const int input_height = input->info()->dimension(1); - const int upper_bound_w = ceil_to_multiple(((output_width + output_padding_right) * _conv_stride_x + (kernel_size - 1)), num_elems_read_per_iteration_x * _lws[0]) - _conv_pad_x - input_width; - const int upper_bound_h = ceil_to_multiple(((output_height + output_padding_bottom) * _conv_stride_y + (kernel_size - 1)), num_elems_read_per_iteration_y * _lws[1]) - _conv_pad_y - input_height; - const int padding_right = std::max(upper_bound_w, _conv_pad_x); - const int padding_bottom = std::max(upper_bound_h, _conv_pad_y); + const int input_width = input->info()->dimension(0); + const int input_height = input->info()->dimension(1); + const int input_total_width = std::max(int(input->info()->padding().left), int(_conv_pad_x)) + input_width + std::max(int(input->info()->padding().right), int(_conv_pad_x)); + const int input_total_height = std::max(int(input->info()->padding().top), int(_conv_pad_y)) + input_height + std::max(int(input->info()->padding().bottom), int(_conv_pad_y)); + const int padding_right1 = ceil_to_multiple(input_total_width, num_elems_read_per_iteration_x * _lws[0]) - input_width - _conv_pad_x; + const int padding_bottom1 = ceil_to_multiple(input_total_height, num_elems_read_per_iteration_y * _lws[1]) - input_height - _conv_pad_y; + + const int upper_bound_w = ceil_to_multiple(((output_width + output_padding_right) * _conv_stride_x + (kernel_size - 1)), num_elems_read_per_iteration_x * _lws[0]) - _conv_pad_x - input_width; + const int upper_bound_h = ceil_to_multiple(((output_height + output_padding_bottom) * _conv_stride_y + (kernel_size - 1)), num_elems_read_per_iteration_y * _lws[1]) - _conv_pad_y - input_height; + const int padding_right2 = std::max(upper_bound_w, _conv_pad_x); + const int padding_bottom2 = std::max(upper_bound_h, _conv_pad_y); + + const int padding_right = std::max(padding_right1, padding_right2); + const int padding_bottom = std::max(padding_bottom1, padding_bottom2); BorderSize border = BorderSize(0, output_padding_right, output_padding_bottom, 0); @@ -406,6 +414,8 @@ void GCDirectConvolutionLayerKernel::run(const Window &window) add_1D_tensor_argument(idx1, _bias, 4, slice_bias); } + slice.shift(Window::DimX, -(_output->info()->padding()).left); + do { unsigned int idx = 0; diff --git a/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp new file mode 100644 index 0000000000..c2182171a6 --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; +using namespace arm_compute::gles_compute; + +GCTensorShiftKernel::GCTensorShiftKernel() + : _input(nullptr), _lws(gles::NDRange(1U, 1U, 1U)) +{ +} + +void GCTensorShiftKernel::configure(IGCTensor *input) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + + _input = input; + + std::set options; + options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0])); + options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1])); + options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2])); + options.emplace("#define WIDTH " + support::cpp11::to_string(input->info()->dimension(0))); + + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + options.emplace(("#define " + dt_name)); + + unsigned int num_elems_written_per_iteration_x = input->info()->dimension(0) + input->info()->padding().left + input->info()->padding().right; + unsigned int num_elems_written_per_iteration_y = 1; + unsigned int num_elems_written_per_iteration_z = 1; + + std::stringstream kernel_name; + kernel_name << "tensorshift"; + + _kernel = static_cast(GCKernelLibrary::get().create_kernel(kernel_name.str(), options)); + + Window win = calculate_max_enlarged_window(*input->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y, num_elems_written_per_iteration_z)); + AccessWindowHorizontal input_access(input->info(), 0, num_elems_written_per_iteration_x); + + update_window_and_padding(win, input_access); + + IGCKernel::configure(win); +} + +void GCTensorShiftKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); + + _kernel.use(); + + // Get initial windows + Window slice = window.first_slice_window_3D(); + slice.shift(Window::DimX, -(_input->info()->padding()).left); + + do + { + unsigned int idx = 0; + + add_3D_tensor_argument(idx, _input, 1, slice); + + const PaddingSize &padding1 = _input->info()->padding(); + + if(int(padding1.left) == 0) + { + break; + } + + _kernel.set_argument(idx++, static_cast(padding1.left)); + + _kernel.update_shader_params(); + enqueue(*this, slice, _lws); + } + while(window.slide_window_slice_3D(slice)); +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp index ae9dd51b8e..769733ca66 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,10 +27,16 @@ #include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" #include "support/ToolchainSupport.h" using namespace arm_compute; +GCDirectConvolutionLayer::GCDirectConvolutionLayer() + : _kernel(nullptr), _border_handler(), _shift_handler() +{ +} void GCDirectConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info) { @@ -61,4 +67,15 @@ void GCDirectConvolutionLayer::configure(const IGCTensor *input, const IGCTensor } _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue(0)); + + _shift_handler.configure(output); +} + +void GCDirectConvolutionLayer::run() +{ + GCScheduler::get().dispatch(_border_handler, false); + GCScheduler::get().memory_barrier(); + GCScheduler::get().dispatch(*_kernel); + GCScheduler::get().memory_barrier(); + GCScheduler::get().dispatch(_shift_handler); } diff --git a/src/runtime/GLES_COMPUTE/functions/GCTensorShift.cpp b/src/runtime/GLES_COMPUTE/functions/GCTensorShift.cpp new file mode 100644 index 0000000000..93496f4b74 --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCTensorShift.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCTensorShift.h" + +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Utils.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +void GCTensorShift::configure(IGCTensor *input) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input); + _kernel = std::move(k); +} diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h index dbcd9d5000..9114f514aa 100644 --- a/tests/datasets/ShapeDatasets.h +++ b/tests/datasets/ShapeDatasets.h @@ -387,6 +387,28 @@ public: } }; +/** Data set containing small tensor shapes for direct convolution. */ +class SmallDirectConvolutionTensorShiftShapes final : public ShapeDataset +{ +public: + SmallDirectConvolutionTensorShiftShapes() + : ShapeDataset("InputShape", + { + // Batch size 1 + TensorShape{ 35U, 35U, 3U }, + TensorShape{ 32U, 37U, 3U }, + // Batch size 4 + TensorShape{ 32U, 37U, 3U, 4U }, + // Batch size 8 + TensorShape{ 32U, 37U, 3U, 8U }, + TensorShape{ 33U, 35U, 3U, 8U }, + // Arbitrary batch size + TensorShape{ 32U, 37U, 3U, 8U } + }) + { + } +}; + /** Data set containing 2D tensor shapes for DepthConcatenateLayer. */ class DepthConcatenateLayerShapes final : public ShapeDataset { diff --git a/tests/validation/GLES_COMPUTE/DirectConvolutionLayerTensorShift.cpp b/tests/validation/GLES_COMPUTE/DirectConvolutionLayerTensorShift.cpp new file mode 100644 index 0000000000..45fb76cad5 --- /dev/null +++ b/tests/validation/GLES_COMPUTE/DirectConvolutionLayerTensorShift.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */ +RelativeTolerance tolerance_fp32(0.02f); /**< Tolerance for floating point tests */ +constexpr float tolerance_num = 0.07f; /**< Tolerance number */ + +/** Direct convolution data set. */ +const auto data = combine(datasets::SmallDirectConvolutionTensorShiftShapes(), + combine(framework::dataset::make("StrideX", 1, 3), + combine(framework::dataset::make("StrideY", 1, 3), + combine(concat(combine(framework::dataset::make("PadX", 0), + combine(framework::dataset::make("PadY", 0), + framework::dataset::make("KernelSize", 1))), + combine(framework::dataset::make("PadX", 0, 2), + combine(framework::dataset::make("PadY", 0, 2), + framework::dataset::make("KernelSize", { 3, 5 })))), + framework::dataset::make("NumKernels", { 3 }))))); +} // namespace + +TEST_SUITE(GC) +TEST_SUITE(DirectConvolutionLayerTensorShift) + +template +using GCDirectConvolutionLayerTensorShiftFixture = DirectConvolutionValidationTensorShiftFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(Run, GCDirectConvolutionLayerTensorShiftFixture, framework::DatasetMode::ALL, combine(data, framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num); +} +TEST_SUITE_END() + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(Run, GCDirectConvolutionLayerTensorShiftFixture, framework::DatasetMode::ALL, combine(data, framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_fp32); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h b/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h new file mode 100644 index 0000000000..d810a765cb --- /dev/null +++ b/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/fixtures/ConvolutionLayerFixture.h" +#include "tests/validation/reference/ConvolutionLayer.h" + +#include + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template +class DirectConvolutionValidationGenericTensorShiftFixture : public framework::Fixture +{ +public: + using TBias = typename std::conditional::type, uint8_t>::value, int32_t, T>::type; + +public: + template + void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, + DataType data_type, int fractional_bits, QuantizationInfo quantization_info) + { + _fractional_bits = fractional_bits; + _quantization_info = quantization_info; + _data_type = data_type; + + const TensorShape weights_shape(kernel_size, kernel_size, input_shape.z(), num_kernels); + const TensorShape bias_shape(num_kernels); + const PadStrideInfo info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR); + const TensorShape output_shape = get_output_shape(input_shape, weights_shape, info); + const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; + + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info); + } + + template + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, + DataType data_type, int fractional_bits, QuantizationInfo quantization_info) + { + _fractional_bits = fractional_bits; + _quantization_info = quantization_info; + _data_type = data_type; + + const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; + + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info); + } + +protected: + template + void fill(U &&tensor, int i) + { + switch(tensor.data_type()) + { + case DataType::QASYMM8: + { + std::uniform_int_distribution distribution(0, 50); + library->fill(tensor, distribution, i); + break; + } + case DataType::F16: + case DataType::F32: + { + std::uniform_real_distribution<> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + case DataType::S32: + { + std::uniform_int_distribution distribution(-5, 5); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + + TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, + DataType data_type, DataType bias_data_type, int fixed_point_position, QuantizationInfo quantization_info) + { + // Create tensors + TensorType src = create_tensor(input_shape, data_type, 1, fixed_point_position, quantization_info); + TensorType weights = create_tensor(weights_shape, data_type, 1, fixed_point_position, quantization_info); + TensorType bias = create_tensor(bias_shape, bias_data_type, 1, fixed_point_position, quantization_info); + TensorType dst = create_tensor(output_shape, data_type, 1, fixed_point_position, quantization_info); + + TensorShape output_shape1 = get_output_shape(output_shape, weights_shape, info); + TensorType dst1 = create_tensor(output_shape1, data_type, 1, fixed_point_position, quantization_info); + + // Create and configure function + FunctionType conv; + conv.configure(&src, &weights, &bias, &dst, info); + FunctionType conv1; + conv1.configure(&dst, &weights, &bias, &dst1, info); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst1.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Allocate tensors + src.allocator()->allocate(); + weights.allocator()->allocate(); + bias.allocator()->allocate(); + dst.allocator()->allocate(); + dst1.allocator()->allocate(); + + ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!dst1.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Fill tensors + fill(AccessorType(src), 0); + fill(AccessorType(weights), 1); + fill(AccessorType(bias), 2); + + // Compute NEConvolutionLayer function + GCScheduler::get().memory_barrier(); + conv.run(); + GCScheduler::get().memory_barrier(); + conv1.run(); + + return dst1; + } + + SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, + DataType data_type, DataType bias_data_type, int fixed_point_position, QuantizationInfo quantization_info) + { + // Create reference + SimpleTensor src{ input_shape, data_type, 1, fixed_point_position, quantization_info }; + SimpleTensor weights{ weights_shape, data_type, 1, fixed_point_position, quantization_info }; + SimpleTensor bias{ bias_shape, bias_data_type, 1, fixed_point_position, quantization_info }; + + SimpleTensor dst{ output_shape, data_type, 1, fixed_point_position, quantization_info }; + TensorShape output_shape1 = get_output_shape(output_shape, weights_shape, info); + + // Fill reference + fill(src, 0); + fill(weights, 1); + fill(bias, 2); + + dst = reference::convolution_layer(src, weights, bias, output_shape, info); + return reference::convolution_layer(dst, weights, bias, output_shape1, info); + } + + TensorType _target{}; + SimpleTensor _reference{}; + int _fractional_bits{}; + QuantizationInfo _quantization_info{}; + DataType _data_type{}; + +private: + TensorShape get_output_shape(TensorShape in_shape, TensorShape kernel_shape, const PadStrideInfo &info) + { + TensorShape out_shape(in_shape); + const std::pair scaled_dims = scaled_dimensions(in_shape.x(), + in_shape.y(), + kernel_shape.x(), + kernel_shape.y(), + info); + out_shape.set(0, scaled_dims.first); + out_shape.set(1, scaled_dims.second); + out_shape.set(2, kernel_shape[3]); + return out_shape; + } +}; + +template +class DirectConvolutionValidationTensorShiftFixture : public DirectConvolutionValidationGenericTensorShiftFixture +{ +public: + template + void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type) + { + DirectConvolutionValidationGenericTensorShiftFixture::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, 0, + QuantizationInfo()); + } +}; + +template +class DirectConvolutionValidationFixedPointTensorShiftFixture : public DirectConvolutionValidationGenericTensorShiftFixture +{ +public: + template + void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, int fractional_bits) + { + DirectConvolutionValidationGenericTensorShiftFixture::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, + fractional_bits, + QuantizationInfo()); + } +}; + +template +class DirectConvolutionValidationQuantizedTensorShiftFixture : public DirectConvolutionValidationGenericTensorShiftFixture +{ +public: + template + void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, QuantizationInfo quantization_info) + { + DirectConvolutionValidationGenericTensorShiftFixture::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, 0, + quantization_info); + } +}; + +template +class DirectConvolutionValidationWithTensorShapesQuantizedTensorShiftFixture : public DirectConvolutionValidationGenericTensorShiftFixture +{ +public: + template + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, + DataType data_type, QuantizationInfo quantization_info) + { + DirectConvolutionValidationGenericTensorShiftFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, quantization_info); + } +}; + +template +class DirectConvolutionValidationWithTensorShapesTensorShiftFixture : public DirectConvolutionValidationGenericTensorShiftFixture +{ +public: + template + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, + DataType data_type) + { + DirectConvolutionValidationGenericTensorShiftFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, QuantizationInfo()); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute -- cgit v1.2.1