From ceb889efc302464efd7fd20001d8a89a06c4e0bd Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Mon, 17 Sep 2018 18:24:41 +0100 Subject: COMPMID-1588 Create UpsampleKernel for YOLOLayer Change-Id: Ic1f9e85306a0a0b1459c9f9aa35bd629deea1710 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/148797 Tested-by: bsgcomp Reviewed-by: Anthony Barbier Reviewed-by: Georgios Pinitas --- arm_compute/core/CL/CLKernels.h | 1 + .../core/CL/kernels/CLUpsampleLayerKernel.h | 79 ++++++++++ arm_compute/core/utils/misc/ShapeCalculator.h | 15 ++ arm_compute/runtime/CL/CLFunctions.h | 1 + arm_compute/runtime/CL/functions/CLUpsampleLayer.h | 83 +++++++++++ src/core/CL/CLKernelLibrary.cpp | 6 + src/core/CL/cl_kernels/upsample_layer.cl | 135 +++++++++++++++++ src/core/CL/kernels/CLUpsampleLayerKernel.cpp | 163 +++++++++++++++++++++ src/runtime/CL/functions/CLUpsampleLayer.cpp | 57 +++++++ tests/validation/CL/UpsampleLayer.cpp | 148 +++++++++++++++++++ tests/validation/fixtures/UpsampleLayerFixture.h | 116 +++++++++++++++ tests/validation/reference/UpsampleLayer.cpp | 86 +++++++++++ tests/validation/reference/UpsampleLayer.h | 45 ++++++ 13 files changed, 935 insertions(+) create mode 100644 arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h create mode 100644 arm_compute/runtime/CL/functions/CLUpsampleLayer.h create mode 100644 src/core/CL/cl_kernels/upsample_layer.cl create mode 100644 src/core/CL/kernels/CLUpsampleLayerKernel.cpp create mode 100644 src/runtime/CL/functions/CLUpsampleLayer.cpp create mode 100644 tests/validation/CL/UpsampleLayer.cpp create mode 100644 tests/validation/fixtures/UpsampleLayerFixture.h create mode 100644 tests/validation/reference/UpsampleLayer.cpp create mode 100644 tests/validation/reference/UpsampleLayer.h diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h index 88658013fa..f39d7b71f6 100644 --- a/arm_compute/core/CL/CLKernels.h +++ b/arm_compute/core/CL/CLKernels.h @@ -118,6 +118,7 @@ #include "arm_compute/core/CL/kernels/CLTableLookupKernel.h" #include "arm_compute/core/CL/kernels/CLThresholdKernel.h" #include "arm_compute/core/CL/kernels/CLTransposeKernel.h" +#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h" #include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h" #include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h" #include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" diff --git a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h new file mode 100644 index 0000000000..dcd4f1bdb4 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H__ +#define __ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the UpsampleLayer kernel on OpenCL. */ +class CLUpsampleLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLUpsampleLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLUpsampleLayerKernel(const CLUpsampleLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLUpsampleLayerKernel &operator=(const CLUpsampleLayerKernel &) = delete; + /** Default Move Constructor. */ + CLUpsampleLayerKernel(CLUpsampleLayerKernel &&) = default; + /** Default move assignment operator */ + CLUpsampleLayerKernel &operator=(CLUpsampleLayerKernel &&) = default; + /** Default destructor */ + ~CLUpsampleLayerKernel() = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. + */ + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy); + /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel + * + * @param[in] input Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] output Destination tensor info. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy upsampling_policy); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + Size2D _info; + unsigned int _num_elems_processed_per_iteration_input_x; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H__ */ diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 6d8e15b8b2..1fdf5b8ff5 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -597,6 +597,21 @@ inline TensorShape compute_padded_shape(const TensorShape &input_shape, const Pa return padded_shape; } +inline TensorShape compute_upsample_shape(const ITensorInfo &input, const Size2D &info) +{ + const DataLayout data_layout = input.data_layout(); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + TensorShape scale_out_shape(input.tensor_shape()); + const unsigned int out_x = input.dimension(idx_width) * info.x(); + const unsigned int out_y = input.dimension(idx_height) * info.y(); + scale_out_shape.set(idx_width, out_x); + scale_out_shape.set(idx_height, out_y); + + return scale_out_shape; +} + template inline TensorShape extract_shape(T *data) { diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h index 4619aa5602..8dadcc1e87 100644 --- a/arm_compute/runtime/CL/CLFunctions.h +++ b/arm_compute/runtime/CL/CLFunctions.h @@ -120,6 +120,7 @@ #include "arm_compute/runtime/CL/functions/CLTableLookup.h" #include "arm_compute/runtime/CL/functions/CLThreshold.h" #include "arm_compute/runtime/CL/functions/CLTranspose.h" +#include "arm_compute/runtime/CL/functions/CLUpsampleLayer.h" #include "arm_compute/runtime/CL/functions/CLWarpAffine.h" #include "arm_compute/runtime/CL/functions/CLWarpPerspective.h" #include "arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h" diff --git a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h new file mode 100644 index 0000000000..df504fb01a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLUPSAMPLELAYER_H__ +#define __ARM_COMPUTE_CLUPSAMPLELAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLUpsampleLayerKernel */ +class CLUpsampleLayer : public IFunction +{ +public: + /** Default constructor */ + CLUpsampleLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLUpsampleLayer(const CLUpsampleLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLUpsampleLayer &operator=(const CLUpsampleLayer &) = delete; + /** Allow instances of this class to be moved */ + CLUpsampleLayer(CLUpsampleLayer &&) = default; + /** Allow instances of this class to be moved */ + CLUpsampleLayer &operator=(CLUpsampleLayer &&) = default; + /** Default destructor */ + virtual ~CLUpsampleLayer() = default; + + /** Initialize the function's source, destination, interpolation type and border_mode. + * + * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. + */ + void configure(ICLTensor *input, ICLTensor *output, + const Size2D &info, const InterpolationPolicy upsampling_policy); + /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample + * + * @param[in] input Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] output Destination tensor info. Data types supported: same as @p input. + * @param[in] info Contains stride information described in @ref Size2D. + * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, + const Size2D &info, const InterpolationPolicy upsampling_policy); + + // Inherited methods overridden: + void run() override; + +private: + CLUpsampleLayerKernel _upsample; + ICLTensor *_output; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLUPSAMPLELAYER_H__ */ diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 391a0bb6d5..7e8ef6b22d 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -391,6 +391,8 @@ const std::map CLKernelLibrary::_kernel_program_map = { "UYVY422_to_NV12_bt709", "color_convert.cl" }, { "UYVY422_to_RGB888_bt709", "color_convert.cl" }, { "UYVY422_to_RGBA8888_bt709", "color_convert.cl" }, + { "upsample_layer_nchw", "upsample_layer.cl" }, + { "upsample_layer_nhwc", "upsample_layer.cl" }, { "warp_affine_nearest_neighbour", "warp_affine.cl" }, { "warp_affine_bilinear", "warp_affine.cl" }, { "warp_perspective_nearest_neighbour", "warp_perspective.cl" }, @@ -545,6 +547,10 @@ const std::map CLKernelLibrary::_program_source_map = { "copy_tensor.cl", #include "./cl_kernels/copy_tensor.clembed" + }, + { + "upsample_layer.cl", +#include "./cl_kernels/upsample_layer.clembed" }, { "deconvolution_layer.cl", diff --git a/src/core/CL/cl_kernels/upsample_layer.cl b/src/core/CL/cl_kernels/upsample_layer.cl new file mode 100644 index 0000000000..65912f585f --- /dev/null +++ b/src/core/CL/cl_kernels/upsample_layer.cl @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "helpers.h" + +/** This function applies upsample on an input image. (NCHW) + * + * @attention The following variables must be passed at compile time: + * -# -DDATA_TYPE = Tensor data type. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * -# -DVEC_SIZE_IN = Input vector size + * -# -DVEC_SIZE_OUT = Output vector size + * -# -DLAST_ACCESSED_X_IN = The input element that is on the X border (threads trying to set this, might need to step back a bit) + * -# -DLAST_ACCESSED_X_OUT = The output element that is on the X border (threads trying to set this, might need to step back a bit) + * + * @param[in] src_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +__kernel void upsample_layer_nchw( + TENSOR3D_DECLARATION(src), + TENSOR3D_DECLARATION(dst)) +{ + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + +#if defined(VEC_SIZE_IN) && defined(VEC_SIZE_OUT) && defined(LAST_ACCESSED_X_IN) && defined(LAST_ACCESSED_X_OUT) + // Check if access on width gets out of bounds + // If it does shift access vector to access elements within bounds + const int xi_in = (int)(get_global_id(0) * VEC_SIZE_IN); + const int xi_out = (int)(get_global_id(0) * VEC_SIZE_OUT); + src.ptr -= max(xi_in - (int)LAST_ACCESSED_X_IN, 0) * src_stride_x; + dst.ptr -= max(xi_out - (int)LAST_ACCESSED_X_OUT, 0) * dst_stride_x; + + VEC_DATA_TYPE(DATA_TYPE, 8) + data = vload8(0, (__global DATA_TYPE *)src.ptr); + + VEC_DATA_TYPE(DATA_TYPE, 16) + data_out = (VEC_DATA_TYPE(DATA_TYPE, 16))(data.s0, data.s0, data.s1, data.s1, data.s2, data.s2, data.s3, data.s3, data.s4, data.s4, data.s5, data.s5, data.s6, data.s6, data.s7, data.s7); + + vstore16(data_out, 0, (__global DATA_TYPE *)dst.ptr); + vstore16(data_out, 0, (__global DATA_TYPE *)tensor3D_offset(&dst, 0, 1, 0)); +#else // !defined(VEC_SIZE_IN) && defined(VEC_SIZE_OUT) && defined(LAST_ACCESSED_X_IN) && defined(LAST_ACCESSED_X_OUT) + *((__global DATA_TYPE *)tensor3D_offset(&dst, 0, 0, 0)) = *((__global DATA_TYPE *)src.ptr); + *((__global DATA_TYPE *)tensor3D_offset(&dst, 0, 1, 0)) = *((__global DATA_TYPE *)src.ptr); +#endif // defined(VEC_SIZE_IN) && defined(VEC_SIZE_OUT) && defined(LAST_ACCESSED_X_IN) && defined(LAST_ACCESSED_X_OUT) +} + +/** This function applies upsample on an input image. (NHWC) + * + * @attention The following variables must be passed at compile time: + * -# -DDATA_TYPE = Tensor data type. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * -# -DVEC_SIZE_IN = Input vector size + * -# -DVEC_SIZE_OUT = Output vector size + * -# -DLAST_ACCESSED_X_IN = The input element that is on the X border (threads trying to set this, might need to step back a bit) + * -# -DLAST_ACCESSED_X_OUT = The output element that is on the X border (threads trying to set this, might need to step back a bit) + * + * @param[in] src_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +__kernel void upsample_layer_nhwc( + TENSOR3D_DECLARATION(src), + TENSOR3D_DECLARATION(dst)) +{ + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + +#if defined(VEC_SIZE_IN) && defined(VEC_SIZE_OUT) && defined(LAST_ACCESSED_X_IN) && defined(LAST_ACCESSED_X_OUT) + // Check if access on width gets out of bounds + // If it does shift access vector to access elements within bounds + const int xi_in = (int)(get_global_id(0) * VEC_SIZE_IN); + const int xi_out = (int)(get_global_id(0) * VEC_SIZE_OUT); + src.ptr -= max(xi_in - (int)LAST_ACCESSED_X_IN, 0) * src_stride_x; + dst.ptr -= max(xi_out - (int)LAST_ACCESSED_X_OUT, 0) * dst_stride_x; + + VEC_DATA_TYPE(DATA_TYPE, 16) + data = vload16(0, (__global DATA_TYPE *)src.ptr); + + vstore16(data, 0, (__global DATA_TYPE *)tensor3D_offset(&dst, 0, 0, 0)); + vstore16(data, 0, (__global DATA_TYPE *)tensor3D_offset(&dst, 0, 1, 0)); + vstore16(data, 0, (__global DATA_TYPE *)tensor3D_offset(&dst, 0, 0, 1)); + vstore16(data, 0, (__global DATA_TYPE *)tensor3D_offset(&dst, 0, 1, 1)); +#else // !defined(VEC_SIZE_IN) && defined(VEC_SIZE_OUT) && defined(LAST_ACCESSED_X_IN) && defined(LAST_ACCESSED_X_OUT) + *((__global DATA_TYPE *)tensor3D_offset(&dst, 0, 0, 0)) = *((__global DATA_TYPE *)src.ptr); + *((__global DATA_TYPE *)tensor3D_offset(&dst, 0, 1, 0)) = *((__global DATA_TYPE *)src.ptr); + *((__global DATA_TYPE *)tensor3D_offset(&dst, 0, 0, 1)) = *((__global DATA_TYPE *)src.ptr); + *((__global DATA_TYPE *)tensor3D_offset(&dst, 0, 1, 1)) = *((__global DATA_TYPE *)src.ptr); +#endif // defined(VEC_SIZE_IN) && defined(VEC_SIZE_OUT) && defined(LAST_ACCESSED_X_IN) && defined(LAST_ACCESSED_X_OUT) +} diff --git a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp new file mode 100644 index 0000000000..ee3fa1141d --- /dev/null +++ b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" + +namespace arm_compute +{ +CLUpsampleLayerKernel::CLUpsampleLayerKernel() + : _input(nullptr), _output(nullptr), _info(), _num_elems_processed_per_iteration_input_x() +{ +} + +Status CLUpsampleLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy upsampling_policy) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_UNUSED(upsampling_policy); + + DataLayout data_layout = input->data_layout(); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_width) != info.x() * input->dimension(idx_width)); + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_height) != info.y() * input->dimension(idx_height)); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.x() != 2 || info.y() != 2, "Only stride 2 is supported"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(upsampling_policy != InterpolationPolicy::NEAREST_NEIGHBOR, "Only nearest neighbor policy supported"); + + return Status{}; +} + +void CLUpsampleLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_UNUSED(upsampling_policy); + + _input = input; + _output = output; + _info = info; + _num_elems_processed_per_iteration_input_x = 1; + + const DataLayout data_layout = input->info()->data_layout(); + + TensorShape output_shape = misc::shape_calculator::compute_upsample_shape(*input->info(), info); + auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); + output->info()->set_data_layout(data_layout); + + unsigned int num_elems_processed_per_iteration_x = 16; + const int output_width_x = output->info()->dimension(0); + const bool multi_access_x = ((output_width_x / num_elems_processed_per_iteration_x) > 0); + + // Perform validation step + ARM_COMPUTE_ERROR_THROW_ON(CLUpsampleLayerKernel::validate(input->info(), output->info(), info, upsampling_policy)); + + Window win{}; + + switch(data_layout) + { + case DataLayout::NCHW: + { + win = calculate_max_window(*output->info()); + win.set(Window::DimY, Window::Dimension(win.y().start(), win.y().end(), info.y())); + if(multi_access_x) + { + _num_elems_processed_per_iteration_input_x = num_elems_processed_per_iteration_x / info.x(); + win.set(Window::DimX, Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), num_elems_processed_per_iteration_x), num_elems_processed_per_iteration_x)); + } + break; + } + case DataLayout::NHWC: + { + win = calculate_max_window(*output->info()); + win.set(Window::DimY, Window::Dimension(win.y().start(), win.y().end(), info.x())); + win.set(Window::DimZ, Window::Dimension(win.z().start(), win.z().end(), info.y())); + if(multi_access_x) + { + _num_elems_processed_per_iteration_input_x = num_elems_processed_per_iteration_x; + win.set(Window::DimX, Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), + num_elems_processed_per_iteration_x), + num_elems_processed_per_iteration_x)); + } + break; + } + default: + ARM_COMPUTE_ERROR("Not implemented"); + } + + // Create kernel + CLBuildOptions build_opts; + build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + build_opts.add_option_if(multi_access_x, "-DVEC_SIZE_IN=" + support::cpp11::to_string(_num_elems_processed_per_iteration_input_x)); + build_opts.add_option_if(multi_access_x, "-DVEC_SIZE_OUT=" + support::cpp11::to_string(num_elems_processed_per_iteration_x)); + build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X_IN=" + support::cpp11::to_string(std::max(_input->info()->dimension(0) - _num_elems_processed_per_iteration_input_x, 0))); + build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X_OUT=" + support::cpp11::to_string(std::max(output_width_x - num_elems_processed_per_iteration_x, 0))); + _kernel = static_cast(CLKernelLibrary::get().create_kernel("upsample_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options())); + + ICLKernel::configure_internal(win); +} + +void CLUpsampleLayerKernel::run(const Window &window, cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + Window collapsed_window = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); + Window slice_out = collapsed_window.first_slice_window_3D(); + Window slice_in = collapsed_window.first_slice_window_3D(); + + DataLayout data_layout = _input->info()->data_layout(); + switch(data_layout) + { + case DataLayout::NCHW: + slice_in.set(Window::DimX, Window::Dimension(0, _input->info()->dimension(0), _num_elems_processed_per_iteration_input_x)); + slice_in.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), 1)); + break; + case DataLayout::NHWC: + slice_in.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), 1)); + slice_in.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(2), 1)); + break; + default: + ARM_COMPUTE_ERROR("Not implemented"); + } + + do + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, slice_in); + add_3D_tensor_argument(idx, _output, slice_out); + enqueue(queue, *this, slice_out); + } + while(collapsed_window.slide_window_slice_3D(slice_out) && collapsed_window.slide_window_slice_3D(slice_in)); +} +} // namespace arm_compute diff --git a/src/runtime/CL/functions/CLUpsampleLayer.cpp b/src/runtime/CL/functions/CLUpsampleLayer.cpp new file mode 100644 index 0000000000..1dad3250a2 --- /dev/null +++ b/src/runtime/CL/functions/CLUpsampleLayer.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLUpsampleLayer.h" + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +namespace arm_compute +{ +CLUpsampleLayer::CLUpsampleLayer() // NOLINT + : _upsample(), + _output(nullptr) +{ +} + +Status CLUpsampleLayer::validate(const ITensorInfo *input, const ITensorInfo *output, + const Size2D &info, const InterpolationPolicy upsampling_policy) +{ + return CLUpsampleLayerKernel::validate(input, output, info, upsampling_policy); +} + +void CLUpsampleLayer::configure(ICLTensor *input, ICLTensor *output, + const Size2D &info, const InterpolationPolicy upsampling_policy) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + + _output = output; + _upsample.configure(input, _output, info, upsampling_policy); +} + +void CLUpsampleLayer::run() +{ + CLScheduler::get().enqueue(_upsample, false); +} +} // namespace arm_compute diff --git a/tests/validation/CL/UpsampleLayer.cpp b/tests/validation/CL/UpsampleLayer.cpp new file mode 100644 index 0000000000..31ad8c1717 --- /dev/null +++ b/tests/validation/CL/UpsampleLayer.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLUpsampleLayer.h" +#include "tests/CL/CLAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/UpsampleLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +constexpr AbsoluteTolerance tolerance(0.001f); +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(UpsampleLayer) + +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, (combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32))), + input_shape, data_type) +{ + InterpolationPolicy upsampling_policy = InterpolationPolicy::NEAREST_NEIGHBOR; + Size2D info = Size2D(2, 2); + + // Create tensors + CLTensor src = create_tensor(input_shape, data_type, 1); + CLTensor dst; + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + CLUpsampleLayer upsample; + upsample.configure(&src, &dst, info, upsampling_policy); + + // Validate valid region + const ValidRegion src_valid_region = shape_to_valid_region(src.info()->tensor_shape()); + const ValidRegion dst_valid_region = shape_to_valid_region(dst.info()->tensor_shape()); + + validate(src.info()->valid_region(), src_valid_region); + validate(dst.info()->valid_region(), dst_valid_region); +} + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(10U, 10U, 2U), 1, DataType::F32), // Mismatching data type + TensorInfo(TensorShape(10U, 10U, 2U), 1, DataType::F32), // Invalid output shape + TensorInfo(TensorShape(10U, 10U, 2U), 1, DataType::F32), // Invalid stride + TensorInfo(TensorShape(10U, 10U, 2U), 1, DataType::F32), // Invalid policy + TensorInfo(TensorShape(10U, 10U, 2U), 1, DataType::F32), + }), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(20U, 20U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(20U, 10U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(20U, 20U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(20U, 20U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(20U, 20U, 2U), 1, DataType::F32), + })), + framework::dataset::make("PadInfo", { Size2D(2, 2), + Size2D(2, 2), + Size2D(1, 1), + Size2D(2, 2), + Size2D(2, 2), + })), + framework::dataset::make("UpsamplingPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, + InterpolationPolicy::NEAREST_NEIGHBOR, + InterpolationPolicy::NEAREST_NEIGHBOR, + InterpolationPolicy::BILINEAR, + InterpolationPolicy::NEAREST_NEIGHBOR, + })), + framework::dataset::make("Expected", { false, false, false, false, true })), + input_info, output_info, pad_info, upsampling_policy, expected) +{ + bool is_valid = bool(CLUpsampleLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pad_info, upsampling_policy)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +template +using CLUpsampleLayerFixture = UpsampleLayerFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLUpsampleLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("PadInfo", { Size2D(2, 2) })), + framework::dataset::make("UpsamplingPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance); +} +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLUpsampleLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("PadInfo", { Size2D(2, 2) })), + framework::dataset::make("UpsamplingPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance); +} + +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // Float + +TEST_SUITE_END() // UpsampleLayer +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/fixtures/UpsampleLayerFixture.h b/tests/validation/fixtures/UpsampleLayerFixture.h new file mode 100644 index 0000000000..8fc3565e18 --- /dev/null +++ b/tests/validation/fixtures/UpsampleLayerFixture.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/reference/UpsampleLayer.h" + +#include + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template +class UpsampleLayerFixture : public framework::Fixture +{ +public: + template + void setup(TensorShape input_shape, DataType data_type, DataLayout data_layout, + Size2D info, const InterpolationPolicy &upsampling_policy) + { + _data_type = data_type; + + _target = compute_target(input_shape, info, upsampling_policy, data_type, data_layout); + _reference = compute_reference(input_shape, info, upsampling_policy, data_type); + } + +protected: + template + void fill(U &&tensor, int i) + { + library->fill_tensor_uniform(tensor, i); + } + + TensorType compute_target(TensorShape input_shape, + const Size2D &info, const InterpolationPolicy &upsampling_policy, DataType data_type, DataLayout data_layout) + { + if(data_layout == DataLayout::NHWC) + { + permute(input_shape, PermutationVector(2U, 0U, 1U)); + } + + // Create tensors + TensorType src = create_tensor(input_shape, data_type, 1, QuantizationInfo(), data_layout); + TensorType dst; + + // Create and configure function + FunctionType upsample; + upsample.configure(&src, &dst, info, upsampling_policy); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Fill tensors + fill(AccessorType(src), 0); + + // Compute DeconvolutionLayer function + upsample.run(); + + return dst; + } + + SimpleTensor compute_reference(const TensorShape &input_shape, + const Size2D &info, const InterpolationPolicy &upsampling_policy, DataType data_type) + { + // Create reference + SimpleTensor src{ input_shape, data_type }; + + // Fill reference + fill(src, 0); + + return reference::upsample_layer(src, info, upsampling_policy); + } + + TensorType _target{}; + SimpleTensor _reference{}; + DataType _data_type{}; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/UpsampleLayer.cpp b/tests/validation/reference/UpsampleLayer.cpp new file mode 100644 index 0000000000..3a340d0905 --- /dev/null +++ b/tests/validation/reference/UpsampleLayer.cpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "UpsampleLayer.h" + +#include "tests/validation/Helpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +template +SimpleTensor upsample_layer(const SimpleTensor &src, + const Size2D &info, const InterpolationPolicy upsampling_policy) +{ + ARM_COMPUTE_ERROR_ON(upsampling_policy != InterpolationPolicy::NEAREST_NEIGHBOR); + ARM_COMPUTE_UNUSED(upsampling_policy); + + TensorShape output_shape = src.shape(); + output_shape.set(0, src.shape().x() * info.x()); + output_shape.set(1, src.shape().y() * info.y()); + + // Create reference + const int stride_x = info.x(); + const int stride_y = info.y(); + int width_out = output_shape.x(); + int height_out = output_shape.y(); + SimpleTensor out{ output_shape, src.data_type(), 1, src.quantization_info() }; + + const int width_in = src.shape().x(); + const int height_in = src.shape().y(); + const int num_2d_slices = src.shape().total_size() / (width_in * height_in); + + for(int slice = 0; slice < num_2d_slices; ++slice) + { + const int offset_slice_in = slice * width_in * height_in; + const int offset_slice_out = slice * height_out * width_out; + for(int y = 0; y < height_out; ++y) + { + for(int x = 0; x < width_out; ++x) + { + const int out_offset = y * width_out + x; + const int in_offset = (y / stride_y) * width_in + x / stride_x; + + T *_out = out.data() + offset_slice_out + out_offset; + const T *in = src.data() + offset_slice_in + in_offset; + *_out = *in; + } + } + } + + return out; +} + +template SimpleTensor upsample_layer(const SimpleTensor &src, + const Size2D &info, const InterpolationPolicy upsampling_policy); +template SimpleTensor upsample_layer(const SimpleTensor &src, + const Size2D &info, const InterpolationPolicy upsampling_policy); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/UpsampleLayer.h b/tests/validation/reference/UpsampleLayer.h new file mode 100644 index 0000000000..fc1da39186 --- /dev/null +++ b/tests/validation/reference/UpsampleLayer.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_UPSAMPLE_LAYER_H__ +#define __ARM_COMPUTE_TEST_UPSAMPLE_LAYER_H__ + +#include "tests/SimpleTensor.h" +#include "tests/validation/Helpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +template +SimpleTensor upsample_layer(const SimpleTensor &src, + const Size2D &info, const InterpolationPolicy upsampling_policy); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* __ARM_COMPUTE_TEST_UPSAMPLE_LAYER_H__ */ -- cgit v1.2.1