aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2017-11-23 09:49:51 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:33 +0000
commit780db4eb6a9e3dee565d14f36d772038cd3253da (patch)
tree53490d6a03bdeb26d77bc8840d1dbf6027e81f5c
parentd7ba5397b676c966cb5069c7187a12a0c35305f5 (diff)
downloadComputeLibrary-780db4eb6a9e3dee565d14f36d772038cd3253da.tar.gz
COMPMID-471 Implement Deconvolution on OpenCL
Change-Id: Ie00c6b08a51d30c5ce2637d40ee3d165b8a68686 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110311 Reviewed-by: Pablo Tello <pablo.tello@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/CL/CLKernels.h3
-rw-r--r--arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h80
-rw-r--r--arm_compute/core/NEON/NEKernels.h3
-rw-r--r--arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h72
-rw-r--r--arm_compute/core/Utils.h28
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h15
-rw-r--r--arm_compute/runtime/CL/CLFunctions.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h103
-rw-r--r--arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h85
-rw-r--r--arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h3
-rw-r--r--arm_compute/runtime/NEON/NEFunctions.h3
-rw-r--r--arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h61
-rw-r--r--arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h72
-rw-r--r--src/core/CL/CLKernelLibrary.cpp7
-rw-r--r--src/core/CL/cl_kernels/deconvolution_layer.cl50
-rw-r--r--src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp117
-rw-r--r--src/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.cpp165
-rw-r--r--src/core/Utils.cpp32
-rw-r--r--src/runtime/CL/functions/CLDeconvolutionLayer.cpp132
-rw-r--r--src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp64
-rw-r--r--src/runtime/NEON/functions/NEDeconvolutionLayer.cpp105
-rw-r--r--src/runtime/NEON/functions/NEDeconvolutionLayerUpsample.cpp121
-rw-r--r--tests/datasets/ShapeDatasets.h4
-rw-r--r--tests/validation/CL/DeconvolutionLayer.cpp192
-rw-r--r--tests/validation/NEON/DeconvolutionLayer.cpp14
-rw-r--r--tests/validation/fixtures/DeconvolutionLayerFixture.h37
-rw-r--r--tests/validation/reference/DeconvolutionLayer.cpp72
-rw-r--r--tests/validation/reference/DeconvolutionLayer.h4
28 files changed, 1016 insertions, 632 deletions
diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
index 9da0e5ab3a..64687fb26a 100644
--- a/arm_compute/core/CL/CLKernels.h
+++ b/arm_compute/core/CL/CLKernels.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,6 +42,7 @@
#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
+#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h"
diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
new file mode 100644
index 0000000000..8867ca1c37
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H__
+#define __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the Deconvolution layer kernel on OpenCL.
+ */
+class CLDeconvolutionLayerUpsampleKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLDeconvolutionLayerUpsampleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDeconvolutionLayerUpsampleKernel(const CLDeconvolutionLayerUpsampleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDeconvolutionLayerUpsampleKernel &operator=(const CLDeconvolutionLayerUpsampleKernel &) = delete;
+ /** Default Move Constructor. */
+ CLDeconvolutionLayerUpsampleKernel(CLDeconvolutionLayerUpsampleKernel &&) = default;
+ /** Default move assignment operator. */
+ CLDeconvolutionLayerUpsampleKernel &operator=(CLDeconvolutionLayerUpsampleKernel &&) = default;
+ /** Default destructor */
+ ~CLDeconvolutionLayerUpsampleKernel() = default;
+
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Source tensor. Data types supported: F32.
+ * @param[out] output Destination tensor. Data types supported: F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled with zero.
+ * @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border, const PadStrideInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
+ *
+ * @param[in] input Source tensor info. Data types supported: F32.
+ * @param[in] output Destination tensor info. Data types supported: F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled with zero.
+ * @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const BorderSize &inner_border, const PadStrideInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ BorderSize _inner_border;
+ PadStrideInfo _info;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H__ */
diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h
index 8a4cf7abeb..d5c4c340ee 100644
--- a/arm_compute/core/NEON/NEKernels.h
+++ b/arm_compute/core/NEON/NEKernels.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,7 +43,6 @@
#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h"
#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
diff --git a/arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h
deleted file mode 100644
index 707564683f..0000000000
--- a/arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDECONVOLUTIONLAYERKERNEL_H__
-#define __ARM_COMPUTE_NEDECONVOLUTIONLAYERKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform scaling on a tensor */
-class NEDeconvolutionLayerUpsampleKernel : public INEKernel
-{
-public:
- /** Default constructor */
- NEDeconvolutionLayerUpsampleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDeconvolutionLayerUpsampleKernel(const NEDeconvolutionLayerUpsampleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDeconvolutionLayerUpsampleKernel &operator=(const NEDeconvolutionLayerUpsampleKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEDeconvolutionLayerUpsampleKernel(NEDeconvolutionLayerUpsampleKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEDeconvolutionLayerUpsampleKernel &operator=(NEDeconvolutionLayerUpsampleKernel &&) = default;
- /** Default destructor */
- ~NEDeconvolutionLayerUpsampleKernel() = default;
-
- /** Initialise the kernel's inputs, output and interpolation policy
- *
- * @param[in] input Source tensor. Data types supported: F32.
- * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
- * @param[out] output Destination tensor. Data types supported: F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- */
- void configure(const ITensor *input, const ITensor *offsets, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Function to perform scale using nearest interpolation on the given window */
- void scale_nearest(const Window &window);
-
- const ITensor *_offsets;
- const ITensor *_input;
- ITensor *_output;
-};
-} // arm_compute
-#endif /*__ARM_COMPUTE_NEDECONVOLUTIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index f78add13f9..51967b1762 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016, 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -614,25 +614,23 @@ TensorShape deconvolution_output_shape(const std::pair<unsigned int, unsigned in
/** Returns expected width and height of the deconvolution's output tensor.
*
- * @param[in] in_width Width of input tensor (Number of columns)
- * @param[in] in_height Height of input tensor (Number of rows)
- * @param[in] kernel_width Kernel width.
- * @param[in] kernel_height Kernel height.
- * @param[in] padx X axis padding.
- * @param[in] pady Y axis padding.
- * @param[in] ax The number of zeros added to right edge of the input.
- * @param[in] ay The number of zeros added to top edge of the input.
- * @param[in] upscalex How much to scale the X axis.
- * @param[in] upscaley How much to scale the Y axis.
- * @param[in] round Rounding policy to be used when computing the output's dimensions.
+ * @param[in] in_width Width of input tensor (Number of columns)
+ * @param[in] in_height Height of input tensor (Number of rows)
+ * @param[in] kernel_width Kernel width.
+ * @param[in] kernel_height Kernel height.
+ * @param[in] padx X axis padding.
+ * @param[in] pady Y axis padding.
+ * @param[in] inner_border_right The number of zeros added to right edge of the input.
+ * @param[in] inner_border_top The number of zeros added to top edge of the input.
+ * @param[in] stride_x X axis input stride.
+ * @param[in] stride_y Y axis input stride.
*
* @return A pair with the new width in the first position and the new height in the second.
*/
-
const std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height,
unsigned int kernel_width, unsigned int kernel_height,
- unsigned int padx, unsigned int pady, unsigned int ax, unsigned int ay,
- float upscalex, float upscaley, DimensionRoundingType round);
+ unsigned int padx, unsigned int pady, unsigned int inner_border_right, unsigned int inner_border_top,
+ unsigned int stride_x, unsigned int stride_y);
/** Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
*
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index f31eb3d336..c7667f2c7b 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -106,7 +106,8 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
unsigned int output_width = 0;
unsigned int output_height = 0;
- std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), conv_info);
+ std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(),
+ weights_shape.y(), conv_info);
TensorShape output_shape{ input_shape };
output_shape.set(0, output_width);
@@ -114,6 +115,16 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
return output_shape;
}
+inline TensorShape compute_deconvolution_shape(const ITensorInfo &input, unsigned int sx, unsigned int sy, unsigned int inner_border_right, unsigned int inner_border_top, const PadStrideInfo &info)
+{
+ TensorShape scale_out_shape(input.tensor_shape());
+ const unsigned int out_x = input.dimension(0) + (input.dimension(0) - 1) * (sx - 1) + inner_border_right + 2 * info.pad().first;
+ const unsigned int out_y = input.dimension(1) + (input.dimension(1) - 1) * (sy - 1) + inner_border_top + 2 * info.pad().second;
+ scale_out_shape.set(0, out_x);
+ scale_out_shape.set(1, out_y);
+
+ return scale_out_shape;
+}
} // namespace shape_calculator
} // namespace misc
} // namespace arm_compute
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index f6ecef7a51..1154ab79aa 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,6 +42,8 @@
#include "arm_compute/runtime/CL/functions/CLColorConvert.h"
#include "arm_compute/runtime/CL/functions/CLConvolution.h"
#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h"
#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
new file mode 100644
index 0000000000..e98cc9b3d6
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDECONVOLUTIONLAYER_H__
+#define __ARM_COMPUTE_CLDECONVOLUTIONLAYER_H__
+
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
+#include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
+
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+/** Function to run the deconvolution layer.
+ *
+ * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input depending on the stride and pad info and then perform a 1x1
+ * convolution pass. Input stride defines how many zeroes we should put between each element of the input, pad is the amount of padding and finally a is a user
+ * specified value where a < stride - 1, that increases the padding top and right of the input image.
+ *
+ * The relation between input to output is as follows:
+ * width_output = round((width_input − 1) ∗ (stride_x - 1) − 2 ∗ padding_x + kernel_x + inner_border_right )
+ * height_output = round((height_input − 1) ∗ (stride_y - 1) − 2 ∗ padding_y + kernel_y + inner_border_top )
+ *
+ * where:
+ * width_input is the size of the first input dimension.
+ * height_input is the size of the second input dimension.
+ * width_output is the size of the first output dimension.
+ * height_output is the size of the second output dimension.
+ * kernel_x and kernel_y are the convolution sizes in x and y.
+ * inner_border_right and inner_border_top the number of zeros added to the right and top edges of the input.
+ * stride_x and stride_y is the input stride of the first and second dimension.
+ *
+ */
+class CLDeconvolutionLayer : public IFunction
+{
+public:
+ /** Constructor */
+ CLDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, OFM, IFM]. Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. The output has the same number of dimensions as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+ * @param[in] inner_border_right The number of zeros added to right edge of the input.
+ * @param[in] inner_border_top The number of zeros added to top edge of the input.
+ *
+ */
+ void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info,
+ unsigned int inner_border_right, unsigned int inner_border_top);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayer
+ *
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+ * @param[in] weights The 4d weights info with dimensions [width, height, OFM, IFM]. Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as @p input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+ * @param[in] inner_border_right The number of zeros added to right edge of the input.
+ * @param[in] inner_border_top The number of zeros added to top edge of the input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
+ unsigned int inner_border_right, unsigned int inner_border_top);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLMemoryGroup _memory_group;
+ CLDeconvolutionLayerUpsample _scale_f;
+ CLDirectConvolutionLayer _conv_f;
+ CLTensor _scaled_output;
+};
+}
+#endif /* __ARM_COMPUTE_CLDECONVOLUTIONLAYER_H__ */ \ No newline at end of file
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
new file mode 100644
index 0000000000..74ee4efb2c
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H__
+#define __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLDeconvolutionLayerUpsampleKernel */
+class CLDeconvolutionLayerUpsample : public IFunction
+{
+public:
+ /** Default constructor */
+ CLDeconvolutionLayerUpsample();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDeconvolutionLayerUpsample(const CLDeconvolutionLayerUpsample &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDeconvolutionLayerUpsample &operator=(const CLDeconvolutionLayerUpsample &) = delete;
+ /** Allow instances of this class to be moved */
+ CLDeconvolutionLayerUpsample(CLDeconvolutionLayerUpsample &&) = default;
+ /** Allow instances of this class to be moved */
+ CLDeconvolutionLayerUpsample &operator=(CLDeconvolutionLayerUpsample &&) = default;
+ /** Default destructor */
+ virtual ~CLDeconvolutionLayerUpsample() = default;
+
+ /** Initialize the function's source, destination, interpolation type and border_mode.
+ *
+ * @param[in, out] input Source tensor. Data type supported: F32.
+ * @param[out] output Destination tensor. Data type supported: F32.
+ * @param[in] inner_border The number of zeros added to right and top edges of the input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution.
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
+ const PadStrideInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
+ *
+ * @param[in] input Source tensor info. Data type supported: F32.
+ * @param[in] output Destination tensor info. Data type supported: F32.
+ * @param[in] inner_border The number of zeros added to right and top edges of the input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const BorderSize &inner_border,
+ const PadStrideInfo &info);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLDeconvolutionLayerUpsampleKernel _upsample;
+ ICLTensor *_output;
+};
+}
+#endif /* __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
index f31a45be90..205c90c478 100644
--- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,6 +40,7 @@ class ICLTensor;
class CLDirectConvolutionLayer : public IFunction
{
public:
+ /** Default constructor */
CLDirectConvolutionLayer();
/** Set the input and output tensors.
*
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index 08852cf368..d09fcb280c 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,7 +44,6 @@
#include "arm_compute/runtime/NEON/functions/NEConvolution.h"
#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
#include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h"
#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h"
#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index 8757bc63aa..091a928db6 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,6 @@
#ifndef __ARM_COMPUTE_NEDECONVOLUTIONLAYER_H__
#define __ARM_COMPUTE_NEDECONVOLUTIONLAYER_H__
-#include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h"
#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
#include "arm_compute/core/Types.h"
@@ -39,13 +38,13 @@ namespace arm_compute
{
/** Function to run the deconvolution layer.
*
- * The operation is similar to convolution but it's implemented by up-sampling the inputs with zeros insertions between the inputs and convolving
- * the kernels on the up-sampled result.
+ * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input depending on the stride and pad info and then perfrom a 1x1
+ * convolution pass. Input stride defines how many zeroes we should put between each element of the input, pad is the amount of padding and finaly a is a user
+ * specified value where a < stride - 1 that increases the padding top and right of the input image.
*
- * Before the Deconvolution is done, up-scaling the first 2D with zeros is performed. The relation between input to
- * output is as follows:
- * width_output = round((width_input − 1) ∗ upscale_x − 2 ∗ padding_x + kernel_x + a_x )
- * height_output = round((height_input − 1) ∗ upscale_y − 2 ∗ padding_y + kernel_y + a_y )
+ * The relation between input to output is as follows:
+ * width_output = round((width_input − 1) ∗ (stride_x - 1) − 2 ∗ padding_x + kernel_x + inner_border_right )
+ * height_output = round((height_input − 1) ∗ (stride_y - 1) − 2 ∗ padding_y + kernel_y + inner_border_top )
*
* where
* width is the size of the first input dimension.
@@ -53,44 +52,54 @@ namespace arm_compute
* width_output is the size of the first output dimension.
* height_output is the size of the second output dimension.
* kernel_x and kernel_y are the convolution sizes in x and y.
- * ax and ay the number of zeros added to the top and right edges of the input.
- * upscale_x and upscale_y how much to scale the X and Y axis.
+ * inner_border_right and inner_border_top the number of zeros added to the top and right edges of the input.
+ * stride_x and stride_y is the input stride of the first and second dimension.
*
* This function calls the following NEON kernels:
*
- * -# @ref NEDeconvolutionLayerUpsampleKernel
* -# @ref NEDirectConvolutionLayer
*
*/
class NEDeconvolutionLayer : public IFunction
{
public:
- /** Constructor */
+ /** Default constructor */
NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDeconvolutionLayer(const NEDeconvolutionLayer &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDeconvolutionLayer &operator=(const NEDeconvolutionLayer &) = delete;
+ /** Allow instances of this class to be moved */
+ NEDeconvolutionLayer(NEDeconvolutionLayer &&) = default;
+ /** Allow instances of this class to be moved */
+ NEDeconvolutionLayer &operator=(NEDeconvolutionLayer &&) = default;
+ /** Default destructor */
+ virtual ~NEDeconvolutionLayer() = default;
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
- * @param[in] weights The 4d weights with dimensions [width, height, OFM, IFM]. Data type supported: Same as @p input.
- * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input.
- * @param[out] output Output tensor. The output has the same number of dimensions as the @p input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
- * @param[in] ax The number of zeros added to right edge of the input.
- * @param[in] ay The number of zeros added to top edge of the input.
- * @param[in] upscalex How much to scale the X axis.
- * @param[in] upscaley How much to scale the Y axis.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, OFM, IFM]. Data type supported: Same as @p input.
+ * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. The output has the same number of dimensions as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+ * @param[in] inner_border_right The number of zeros added to right edge of the input.
+ * @param[in] inner_border_top The number of zeros added to top edge of the input.
*
*/
void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info,
- unsigned int ax, unsigned int ay, float upscalex, float upscaley);
+ unsigned int inner_border_right, unsigned int inner_border_top);
// Inherited methods overridden:
void run() override;
private:
- MemoryGroup _memory_group;
- NEDeconvolutionLayerUpsample _scale_f;
- NEDirectConvolutionLayer _conv_f;
- Tensor _scaled_output;
+ MemoryGroup _memory_group;
+ NEDirectConvolutionLayer _conv_f;
+ Tensor _scaled_output;
+ ITensor *_input;
+ PadStrideInfo _info;
+ std::pair<unsigned int, unsigned int> _inner_border;
};
} // arm_compute
#endif /* __ARM_COMPUTE_NEDECONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h
deleted file mode 100644
index d2ac12a58a..0000000000
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDECONVOLUTIONUPSAMPLE_H__
-#define __ARM_COMPUTE_NEDECONVOLUTIONUPSAMPLE_H__
-
-#include "arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEDeconvolutionLayerUpsampleKernel */
-class NEDeconvolutionLayerUpsample : public IFunction
-{
-public:
- /** Constructor
- *
- * Initialize NEDeconvolutionLayerUpsample
- */
- NEDeconvolutionLayerUpsample(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Initialize the function's source, destination, interpolation type and border_mode.
- *
- * @param[in, out] input Source tensor. Data type supported: F32.
- * @param[out] output Destination tensor. Data type supported: F32.
- * @param[in] a Top and right inner border sizes. These rows and columns will be filled with zero.
- * @param[in] iz The number of zeros to be inserted between each input sample
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
- */
- void configure(ITensor *input, ITensor *output, const std::pair<unsigned int, unsigned int> &a,
- const std::pair<unsigned int, unsigned int> &iz, const PadStrideInfo &info);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- Tensor _offsets;
- NEFillBorderKernel _border_handler;
- NEDeconvolutionLayerUpsampleKernel _upsample;
-};
-} // arm_compute
-#endif /*__ARM_COMPUTE_NEDECONVOLUTIONUPSAMPLE_H__ */
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index de75518a05..352b89baa5 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -186,6 +186,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "copy_plane", "channel_extract.cl" },
{ "copy_planes_3p", "channel_combine.cl" },
{ "copy_to_keypoint", "fast_corners.cl" },
+ { "deconvolution_upsample", "deconvolution_layer.cl" },
{ "depthwise_convolution_3x3", "depthwise_convolution.cl" },
{ "depthwise_convolution_3x3_quantized", "depthwise_convolution_quantized.cl" },
{ "depthwise_im2col", "depthwise_convolution.cl" },
@@ -421,6 +422,10 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/convolution_rectangle.clembed"
},
{
+ "deconvolution_layer.cl",
+#include "./cl_kernels/deconvolution_layer.clembed"
+ },
+ {
"depth_convert.cl",
#include "./cl_kernels/depth_convert.clembed"
},
diff --git a/src/core/CL/cl_kernels/deconvolution_layer.cl b/src/core/CL/cl_kernels/deconvolution_layer.cl
new file mode 100644
index 0000000000..2514ddc8cc
--- /dev/null
+++ b/src/core/CL/cl_kernels/deconvolution_layer.cl
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+/** This function applies upsample on an input image.
+ *
+ * @param[in] src_ptr Pointer to the source image. Supported data types: F32
+ * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr Pointer to the destination image. Supported data types: F32
+ * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
+ * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+__kernel void deconvolution_upsample(
+ IMAGE_DECLARATION(src),
+ IMAGE_DECLARATION(dst))
+{
+ Image src = CONVERT_TO_IMAGE_STRUCT(src);
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+ // Store result
+ *((__global float *)dst.ptr) = *((__global float *)src.ptr);
+}
diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
new file mode 100644
index 0000000000..5c08d5bee2
--- /dev/null
+++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+using namespace arm_compute;
+
+CLDeconvolutionLayerUpsampleKernel::CLDeconvolutionLayerUpsampleKernel()
+ : _input(nullptr), _output(nullptr), _inner_border(), _info()
+{
+}
+
+Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const BorderSize &inner_border,
+ const PadStrideInfo &info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_UNUSED(info);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) == 0);
+
+ for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i));
+ }
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.right > info.stride().first - 1, "inner_border_right must be smaller that stride_x");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.top > info.stride().second - 1, "inner_border_top must be smaller that stride_y");
+
+ return Status{};
+}
+
+void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
+ const PadStrideInfo &info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+ _input = input;
+ _output = output;
+ _inner_border = inner_border;
+ _info = info;
+
+ // Perform validation step
+ ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayerUpsampleKernel::validate(input->info(), output->info(), inner_border, info));
+
+ // Create kernel
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("deconvolution_upsample"));
+
+ constexpr unsigned int num_elems_processed_per_iteration = 1;
+
+ // Configure kernel window
+ Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
+
+ AccessWindowHorizontal output_access(output->info(), 0, 0, num_elems_processed_per_iteration);
+ output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+
+ ICLKernel::configure(win);
+}
+
+void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ const int out_start_x = _info.pad().first;
+ const int out_end_x = _output->info()->dimension(0) - _inner_border.right - _info.pad().first + _info.stride().first - 1;
+ const int out_step_x = _info.stride().first;
+
+ const int out_start_y = _inner_border.top + _info.pad().second;
+ const int out_end_y = _output->info()->dimension(1) - _info.pad().second + _info.stride().second - 1;
+ const int out_step_y = _info.stride().second;
+
+ Window slice_out = window.first_slice_window_2D();
+ slice_out.set(Window::DimX, Window::Dimension(out_start_x, out_end_x, out_step_x));
+ slice_out.set(Window::DimY, Window::Dimension(out_start_y, out_end_y, out_step_y));
+
+ Window slice_in = window.first_slice_window_2D();
+
+ do
+ {
+ unsigned int idx = 0;
+ add_2D_tensor_argument(idx, _input, slice_in);
+ add_2D_tensor_argument(idx, _output, slice_out);
+ enqueue(queue, *this, slice_out);
+ }
+ while(window.slide_window_slice_2D(slice_in) && window.slide_window_slice_2D(slice_out));
+}
diff --git a/src/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.cpp b/src/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.cpp
deleted file mode 100644
index 71db2e9782..0000000000
--- a/src/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include <arm_neon.h>
-#include <cstddef>
-#include <cstdint>
-
-using namespace arm_compute;
-
-NEDeconvolutionLayerUpsampleKernel::NEDeconvolutionLayerUpsampleKernel()
- : _offsets(nullptr), _input(nullptr), _output(nullptr)
-{
-}
-
-BorderSize NEDeconvolutionLayerUpsampleKernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void NEDeconvolutionLayerUpsampleKernel::configure(const ITensor *input, const ITensor *offsets, ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) == 0);
- ARM_COMPUTE_ERROR_ON(output->info()->dimension(1) == 0);
-
- for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
- {
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i));
- }
-
- _input = input;
- _output = output;
- _offsets = offsets;
-
- constexpr unsigned int num_elems_processed_per_iteration = 16;
- const int border_offset = border_size().left;
-
- // Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowRectangle input_access(input->info(), -border_offset, -border_offset, input->info()->dimension(0) + border_offset, input->info()->dimension(1) + border_offset);
- AccessWindowHorizontal offsets_access(offsets->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, input_access, offsets_access, output_access);
-
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- INEKernel::configure(win);
-}
-
-void NEDeconvolutionLayerUpsampleKernel::scale_nearest(const Window &window)
-{
- const size_t input_stride = _input->info()->strides_in_bytes()[1];
-
- // Compute the ratio between source height and destination height
- const auto hr = static_cast<float>(_input->info()->dimension(1)) / static_cast<float>(_output->info()->dimension(1));
-
- // Don't increment in X and Y direction for the input tensor
- // A pointer to the start of this plane is needed as base for the precomputed offsets
- Window win_in(window);
- win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
- win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
-
- Window win_off;
- win_off.set(Window::DimX, window[Window::DimX]);
- win_off.set(Window::DimY, window[Window::DimY]);
-
- for(size_t d = Window::DimZ; d < _offsets->info()->num_dimensions(); ++d)
- {
- win_off.set(d, Window::Dimension(0, 0, 0));
- }
-
- Iterator in(_input, win_in);
- Iterator out(_output, window);
- Iterator offsets(_offsets, win_off);
-
- switch(_input->info()->data_type())
- {
- case DataType::F32:
- {
- float32x4x4_t tmp =
- {
- {
- vdupq_n_f32(0),
- vdupq_n_f32(0)
- }
- };
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
-
- const size_t in_yi = (id.y() + 0.5f) * hr;
- const size_t offset_row = in_yi * input_stride;
-
- tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0);
- tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[4] + offset_row), tmp.val[0], 1);
- tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[8] + offset_row), tmp.val[0], 2);
- tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[12] + offset_row), tmp.val[0], 3);
-
- tmp.val[1] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[1] + offset_row), tmp.val[1], 0);
- tmp.val[1] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[5] + offset_row), tmp.val[1], 1);
- tmp.val[1] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[9] + offset_row), tmp.val[1], 2);
- tmp.val[1] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[13] + offset_row), tmp.val[1], 3);
-
- tmp.val[2] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[2] + offset_row), tmp.val[2], 0);
- tmp.val[2] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[6] + offset_row), tmp.val[2], 1);
- tmp.val[2] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[10] + offset_row), tmp.val[2], 2);
- tmp.val[2] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[14] + offset_row), tmp.val[2], 3);
-
- tmp.val[3] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[3] + offset_row), tmp.val[3], 0);
- tmp.val[3] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[7] + offset_row), tmp.val[3], 1);
- tmp.val[3] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[11] + offset_row), tmp.val[3], 2);
- tmp.val[3] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[15] + offset_row), tmp.val[3], 3);
-
- vst4q_f32(reinterpret_cast<float *>(out.ptr()), tmp);
- },
- in, offsets, out);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
-}
-
-void NEDeconvolutionLayerUpsampleKernel::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- scale_nearest(window);
-}
diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp
index 76d0b0f059..a8249c4840 100644
--- a/src/core/Utils.cpp
+++ b/src/core/Utils.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016, 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -261,29 +261,17 @@ TensorShape arm_compute::deconvolution_output_shape(const std::pair<unsigned int
const std::pair<unsigned int, unsigned int> arm_compute::deconvolution_output_dimensions(
unsigned int in_width, unsigned int in_height, unsigned int kernel_width, unsigned int kernel_height, unsigned int padx, unsigned int pady,
- unsigned int ax, unsigned int ay, float upscalex, float upscaley, DimensionRoundingType round)
+ unsigned int inner_border_right, unsigned int inner_border_top, unsigned int stride_x, unsigned int stride_y)
{
ARM_COMPUTE_ERROR_ON(in_width < 1 || in_height < 1);
- ARM_COMPUTE_ERROR_ON(((in_width - 1) * upscalex + kernel_width + ax) < 2.f * padx);
- ARM_COMPUTE_ERROR_ON(((in_height - 1) * upscaley + kernel_height + ay) < 2.f * pady);
- const float fw = (in_width - 1) * upscalex - 2.f * padx + kernel_width + ax;
- const float fh = (in_height - 1) * upscaley - 2.f * pady + kernel_height + ay;
- int w = 0;
- int h = 0;
- switch(round)
- {
- case DimensionRoundingType::FLOOR:
- w = std::floor(fw);
- h = std::floor(fh);
- break;
- case DimensionRoundingType::CEIL:
- w = std::ceil(fw);
- h = std::ceil(fh);
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
+ ARM_COMPUTE_ERROR_ON(((in_width - 1) * stride_x + kernel_width + inner_border_right) < 2 * padx);
+ ARM_COMPUTE_ERROR_ON(((in_height - 1) * stride_y + kernel_height + inner_border_top) < 2 * pady);
+ const int padx_deconv = (kernel_width - padx - 1);
+ const int pady_deconv = (kernel_height - pady - 1);
+ ARM_COMPUTE_ERROR_ON(padx_deconv < 0);
+ ARM_COMPUTE_ERROR_ON(pady_deconv < 0);
+ const int w = stride_x * (in_width - 1) + kernel_width + inner_border_right - 2 * padx_deconv;
+ const int h = stride_y * (in_height - 1) + kernel_height + inner_border_top - 2 * pady_deconv;
return std::make_pair<unsigned int, unsigned int>(w, h);
}
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
new file mode 100644
index 0000000000..1c55722344
--- /dev/null
+++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include <memory>
+#include <tuple>
+
+using namespace arm_compute;
+using namespace arm_compute::misc::shape_calculator;
+
+CLDeconvolutionLayer::CLDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+ : _memory_group(std::move(memory_manager)),
+ _scale_f(),
+ _conv_f(),
+ _scaled_output()
+{
+}
+
+Status CLDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
+ unsigned int inner_border_right, unsigned int inner_border_top)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1));
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) < 1);
+
+ const unsigned int stride_x = info.stride().first;
+ const unsigned int stride_y = info.stride().second;
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_right > stride_x - 1, "inner_border_right must be smaller than stride_x");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_top > stride_y - 1, "inner_border_top must be smaller than stride_y");
+
+ auto out_dims = deconvolution_output_dimensions(input->dimension(0), input->dimension(1), weights->dimension(0), weights->dimension(1),
+ info.pad().first, info.pad().second, inner_border_right, inner_border_top, stride_x, stride_y);
+
+ const TensorShape output_shape = deconvolution_output_shape(out_dims, input->tensor_shape(), weights->tensor_shape());
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights, bias);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output, weights, bias);
+
+ if(bias != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, bias);
+ }
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(), "Output's width is invalid.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) != output_shape.y(), "Output's height is invalid.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), "Output's depth is invalid.");
+
+ TensorInfo scale_out_info(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_deconvolution_shape(*input, stride_x, stride_y, inner_border_right, inner_border_top,
+ info)));
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+
+ ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(input, &scale_out_info, BorderSize(inner_border_right, inner_border_top), info));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLDirectConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info));
+
+ return Status{};
+}
+
+void CLDeconvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info,
+ unsigned int inner_border_right, unsigned int inner_border_top)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+
+ const unsigned int stride_x = info.stride().first;
+ const unsigned int stride_y = info.stride().second;
+
+ auto out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1),
+ info.pad().first, info.pad().second, inner_border_top, inner_border_right, stride_x, stride_y);
+
+ const TensorShape output_shape = deconvolution_output_shape(out_dims, input->info()->tensor_shape(), weights->info()->tensor_shape());
+
+ // Output auto initialization if not yet initialized
+ auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+
+ // Perform validation step
+ ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayer::validate(input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), info, inner_border_right, inner_border_top));
+
+ _memory_group.manage(&_scaled_output);
+
+ // configure scale function
+ // Init and allocate intermmidiate tensor for output, same size as input but the first two axis are the same as the output tensor
+ TensorShape scale_out_shape(input->info()->tensor_shape());
+ const unsigned int out_x = input->info()->dimension(0) + (input->info()->dimension(0) - 1) * (stride_x - 1) + inner_border_right + 2 * info.pad().first;
+ const unsigned int out_y = input->info()->dimension(1) + (input->info()->dimension(1) - 1) * (stride_y - 1) + inner_border_top + 2 * info.pad().second;
+ scale_out_shape.set(0, out_x);
+ scale_out_shape.set(1, out_y);
+ TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+ _scaled_output.allocator()->init(scale_out_info);
+
+ _scale_f.configure(input, &_scaled_output, BorderSize(inner_border_top, inner_border_right), info);
+
+ // setup the function to convolve the upscaled output
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+ _conv_f.configure(&_scaled_output, weights, bias, output, conv_info);
+ _scaled_output.allocator()->allocate();
+}
+
+void CLDeconvolutionLayer::run()
+{
+ _memory_group.acquire();
+ _scale_f.run();
+ _conv_f.run();
+ _memory_group.release();
+}
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp b/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp
new file mode 100644
index 0000000000..13a24f8ba4
--- /dev/null
+++ b/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+#include <cmath>
+#include <memory>
+#include <tuple>
+
+using namespace arm_compute;
+
+CLDeconvolutionLayerUpsample::CLDeconvolutionLayerUpsample() // NOLINT
+ : _upsample(),
+ _output(nullptr)
+{
+}
+
+Status CLDeconvolutionLayerUpsample::validate(const ITensorInfo *input, const ITensorInfo *output, const BorderSize &inner_border,
+ const PadStrideInfo &info)
+{
+ return CLDeconvolutionLayerUpsampleKernel::validate(input, output, inner_border, info);
+}
+
+void CLDeconvolutionLayerUpsample::configure(ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
+ const PadStrideInfo &info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+ _output = output;
+ _upsample.configure(input, _output, inner_border, info);
+}
+
+void CLDeconvolutionLayerUpsample::run()
+{
+ _output->map(CLScheduler::get().queue(), true);
+ memset(_output->buffer(), 0, _output->info()->total_size());
+ _output->unmap(CLScheduler::get().queue());
+
+ CLScheduler::get().enqueue(_upsample, false);
+}
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index 7b4e77b296..c4bca11d14 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,38 +24,41 @@
#include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
using namespace arm_compute;
+using namespace arm_compute::misc::shape_calculator;
NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
- _scale_f(),
_conv_f(),
- _scaled_output()
+ _scaled_output(),
+ _input(nullptr),
+ _info(),
+ _inner_border()
{
}
void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info,
- unsigned int ax, unsigned int ay, float upscalex, float upscaley)
+ unsigned int inner_border_right, unsigned int inner_border_top)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != weights->info()->dimension(1));
- ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) < 1);
+ ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 1 && weights->info()->dimension(0) != 3 && weights->info()->dimension(0) != 5);
- auto out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1),
- info.pad().first, info.pad().second, ax, ay, upscalex, upscaley, info.round());
+ _input = input;
+ _info = info;
+ _inner_border = std::make_pair(inner_border_right, inner_border_top);
- const TensorShape output_shape = deconvolution_output_shape(out_dims, input->info()->tensor_shape(), weights->info()->tensor_shape());
-
- // Output auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+ const unsigned int stride_x = info.stride().first;
+ const unsigned int stride_y = info.stride().second;
+ auto out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1),
+ info.pad().first, info.pad().second, inner_border_right, inner_border_top, stride_x, stride_y);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights, bias);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output, weights, bias);
+ const TensorShape output_shape = deconvolution_output_shape(out_dims, input->info()->tensor_shape(), weights->info()->tensor_shape());
ARM_COMPUTE_ERROR_ON_MSG(output->info()->dimension(Window::DimX) != output_shape.x(), "Output's width is invalid.");
ARM_COMPUTE_ERROR_ON_MSG(output->info()->dimension(Window::DimY) != output_shape.y(), "Output's height is invalid.");
@@ -64,51 +67,51 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
_memory_group.manage(&_scaled_output);
// configure scale function
- //Init and allocate intermmidiate tensor for output, same size as input but the first two axis are the same as the output tensor
- TensorShape scale_out_shape(input->info()->tensor_shape());
- scale_out_shape.set(0, output->info()->dimension(0));
- scale_out_shape.set(1, output->info()->dimension(1));
- TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+ // Init and allocate intermmidiate tensor for output, same size as input but the first two axis are the same as the output tensor
+ const TensorInfo scale_out_info(compute_deconvolution_shape(*input->info(), stride_x, stride_y, inner_border_right, inner_border_top, info), 1, input->info()->data_type(),
+ input->info()->fixed_point_position());
_scaled_output.allocator()->init(scale_out_info);
- const unsigned int kernel_size = weights->info()->dimension(0);
- // Padding for the upsampled image is calculated with the equiation: p' = k - p - 1, where k is kernel size and p is the input padding
- ARM_COMPUTE_ERROR_ON(info.pad().first > (kernel_size - 1));
- const unsigned int tr_px = kernel_size - info.pad().first - 1;
- const unsigned int tr_py = kernel_size - info.pad().second - 1;
- const unsigned int tr_stride = 1;
- const PadStrideInfo transposed_info(tr_stride, tr_stride, tr_px, tr_py);
- _scale_f.configure(input, &_scaled_output, std::make_pair(ax, ay), std::make_pair(info.stride().first - 1u, info.stride().second - 1u), transposed_info);
+
// setup the function to convolve the upscaled output
- switch(kernel_size)
- {
- case 1:
- {
- _conv_f.configure(&_scaled_output, weights, bias, output, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL));
- break;
- }
- case 3:
- {
- _conv_f.configure(&_scaled_output, weights, bias, output, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL));
- break;
- }
- case 5:
- {
- _conv_f.configure(&_scaled_output, weights, bias, output, PadStrideInfo(1, 1, 2, 2, DimensionRoundingType::CEIL));
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
- }
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+ _conv_f.configure(&_scaled_output, weights, bias, output, conv_info);
_scaled_output.allocator()->allocate();
}
void NEDeconvolutionLayer::run()
{
_memory_group.acquire();
- _scale_f.run();
+
+ // Initialize _scaled_output buffer
+ const int width_in = _input->info()->dimension(0);
+ const int height_in = _input->info()->dimension(1);
+ const int width_scaled = _scaled_output.info()->dimension(0);
+ const int height_scaled = _scaled_output.info()->dimension(1);
+ const int num_2d_slices = _input->info()->tensor_shape().total_size() / (width_in * height_in);
+ const int stride_x = _info.stride().first;
+ const int stride_y = _info.stride().second;
+
+ std::fill_n(reinterpret_cast<float *>(_scaled_output.buffer()), _scaled_output.info()->tensor_shape().total_size(), 0.f);
+
+ // scaled_output is the input for the forward convolution. We copy the input elements to scaled_output
+ // and insert rows and columns with zeroes depending on the stride values.
+ for(int slice = 0; slice < num_2d_slices; ++slice)
+ {
+ const int start_x = _info.pad().first;
+ const int start_y = _inner_border.second + _info.pad().second;
+ const int end_y = height_scaled - _info.pad().second;
+ const int end_x = width_scaled - _inner_border.first - _info.pad().first;
+
+ for(int yi = start_y, in_y = 0; yi < end_y; yi += stride_y, in_y++)
+ {
+ for(int xi = start_x, in_x = 0; xi < end_x; xi += stride_x, in_x++)
+ {
+ const auto in = *(reinterpret_cast<float *>(_input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(in_x, in_y, slice))));
+ *(reinterpret_cast<float *>(_scaled_output.buffer() + _scaled_output.info()->offset_element_in_bytes(Coordinates(xi, yi, slice)))) = in;
+ }
+ }
+ }
+
_conv_f.run();
_memory_group.release();
}
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayerUpsample.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayerUpsample.cpp
deleted file mode 100644
index 63f17bcb5a..0000000000
--- a/src/runtime/NEON/functions/NEDeconvolutionLayerUpsample.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h"
-
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "support/ToolchainSupport.h"
-
-#include <cmath>
-#include <cstddef>
-#include <utility>
-
-using namespace arm_compute;
-
-namespace
-{
-inline void precompute_offsets(ITensor *offsets, float wr, size_t input_element_size, const std::pair<unsigned int, unsigned int> &a,
- const std::pair<unsigned int, unsigned int> &iz, const PadStrideInfo &info)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == offsets);
- Window win;
- const int padx = info.pad().first;
- const int pady = info.pad().second;
- const int ax = a.first;
- const int ay = a.second;
- const int offset_width = offsets->info()->dimension(0);
- const int offset_height = offsets->info()->dimension(1);
- // The values of ax and ay denote the number of ZEROS to be added on the top and right inner border of the image.
- // Step value along the XY axis will depend on the number of zeros to be inserted between samples (number of zeros + 1).
- // Pre-compute the X offset, Y's stride is unknown at this point so we can't precompute Y's offsets
- for(int yi = ay; yi < (offset_height - pady); yi += (1 + iz.second))
- {
- for(int xi = padx; xi < (offset_width - ax); xi += (1 + iz.first))
- {
- int *ptr = reinterpret_cast<int *>(offsets->ptr_to_element(Coordinates(xi, yi)));
- const size_t in_xi = (xi + 0.5f) * wr;
- *reinterpret_cast<int32_t *>(ptr) = in_xi * input_element_size;
- }
- }
-}
-} // namespace
-
-NEDeconvolutionLayerUpsample::NEDeconvolutionLayerUpsample(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _offsets(),
- _border_handler(),
- _upsample()
-{
-}
-
-void NEDeconvolutionLayerUpsample::configure(ITensor *input, ITensor *output, const std::pair<unsigned int, unsigned int> &a,
- const std::pair<unsigned int, unsigned int> &iz, const PadStrideInfo &info)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
-
- for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
- {
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i));
- }
-
- // Get the tensor shape
- const TensorShape shape(output->info()->dimension(0), output->info()->dimension(1));
-
- // Compute the ratio between source width/height and destination width/height
- const auto wr = static_cast<float>(input->info()->dimension(0)) / static_cast<float>(output->info()->dimension(0));
- const auto hr = static_cast<float>(input->info()->dimension(1)) / static_cast<float>(output->info()->dimension(1));
- ARM_COMPUTE_UNUSED(hr);
- // Get the element size of the input image
- const size_t input_element_size = input->info()->element_size();
-
- TensorInfo tensor_info_offsets(shape, Format::S32);
- _offsets.allocator()->init(tensor_info_offsets);
-
- _upsample.configure(input, &_offsets, output);
-
- // Allocate once the configure methods have been called
- _offsets.allocator()->allocate();
- // Pre-compute offsets for nearest interpolation
- std::fill_n(reinterpret_cast<int32_t *>(_offsets.buffer()), _offsets.info()->total_size() / sizeof(int32_t), -1 * input_element_size);
- precompute_offsets(&_offsets, wr, input_element_size, a, iz, info);
-
- _border_handler.configure(input, _upsample.border_size(), BorderMode::CONSTANT, PixelValue(0));
-}
-
-void NEDeconvolutionLayerUpsample::run()
-{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
- _memory_group.acquire();
- NEScheduler::get().schedule(&_upsample, Window::DimY);
- _memory_group.release();
-}
diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h
index 58fba07bf8..a5e03c737f 100644
--- a/tests/datasets/ShapeDatasets.h
+++ b/tests/datasets/ShapeDatasets.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -239,7 +239,7 @@ public:
SmallDeconvolutionShapes()
: ShapeDataset("InputShape",
{
- TensorShape{ 2U, 3U, 3U, 2U },
+ TensorShape{ 4U, 3U, 3U, 2U },
TensorShape{ 5U, 5U, 3U },
TensorShape{ 11U, 13U, 4U, 3U }
})
diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp
new file mode 100644
index 0000000000..59e85537e5
--- /dev/null
+++ b/tests/validation/CL/DeconvolutionLayer.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DeconvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
+
+const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2)
+ * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
+
+const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1)
+ * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
+
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DeconvolutionLayer)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, (combine(datasets::SmallDeconvolutionShapes(), framework::dataset::make("DataType", DataType::F32))),
+ input_shape, data_type)
+{
+ // Create shapes
+ const unsigned int kernel_size_x = 3;
+ const unsigned int kernel_size_y = 3;
+ const unsigned int num_kernels = 1;
+ const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
+ const TensorShape bias_shape(num_kernels);
+ auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, 1, 1, 0, 0, 1, 1);
+ TensorShape output_shape = deconvolution_output_shape(out_dim, input_shape, weights_shape);
+
+ // Create tensors
+ CLTensor src = create_tensor<CLTensor>(input_shape, data_type, 1);
+ CLTensor weights = create_tensor<CLTensor>(weights_shape, data_type, 1);
+ CLTensor bias = create_tensor<CLTensor>(bias_shape, data_type, 1);
+ CLTensor dst = create_tensor<CLTensor>(output_shape, data_type, 1);
+
+ ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+ // Create and configure function
+ CLDeconvolutionLayer deconv;
+ deconv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), 0, 0);
+
+ // Validate valid region
+ const ValidRegion src_valid_region = shape_to_valid_region(input_shape);
+ const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape);
+ const ValidRegion bias_valid_region = shape_to_valid_region(bias_shape);
+ const ValidRegion dst_valid_region = shape_to_valid_region(output_shape);
+
+ validate(src.info()->valid_region(), src_valid_region);
+ validate(weights.info()->valid_region(), weights_valid_region);
+ validate(bias.info()->valid_region(), bias_valid_region);
+ validate(dst.info()->valid_region(), dst_valid_region);
+}
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid weights shape
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4), // Non supported data type
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 11), // Invalid bias shape
+ TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32, 0), // Window shrink
+ TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32, 0),
+ }),
+ framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16, 0),
+ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+ TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::QS8, 5),
+ TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32, 11),
+ TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32, 0),
+ TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32, 0),
+ })),
+ framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(1U), 1, DataType::F16, 0),
+ TensorInfo(TensorShape(1U), 1, DataType::F32, 0),
+ TensorInfo(TensorShape(1U), 1, DataType::F32, 5),
+ TensorInfo(TensorShape(25U, 11U), 1, DataType::F32, 11),
+ TensorInfo(TensorShape(1U), 1, DataType::F32, 0),
+ TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+ })),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16, 0),
+ TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32, 0),
+ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 5),
+ TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32, 0),
+ TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32, 0),
+ TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32, 0),
+ })),
+ framework::dataset::make("PadStrideInfo", { PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 0, 0),
+ PadStrideInfo(1, 1, 1, 1),
+ PadStrideInfo(1, 1, 0, 0),
+ })),
+ framework::dataset::make("ax", { 1U,
+ 1U,
+ 1U,
+ 1U,
+ 0U,
+ 0U,
+ })),
+ framework::dataset::make("ay", { 1U,
+ 1U,
+ 1U,
+ 1U,
+ 0U,
+ 0U,
+ })),
+ framework::dataset::make("Expected", { false, false, false, false, false, true })),
+ input_info, weights_info, bias_info, output_info, pad_info, ax, ay, expected)
+{
+ bool is_valid = bool(CLDeconvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pad_info, ax, ay));
+ ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using CLDeconvolutionLayerFixture3x3 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 3, 3>;
+
+template <typename T>
+using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 1, 1>;
+
+TEST_SUITE(Float)
+
+TEST_SUITE(FP32)
+TEST_SUITE(W3x3)
+
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(data3x3, framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(W1x1)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<float>, framework::DatasetMode::ALL, combine(data1x1, framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index 751a96558a..9573784d86 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,15 +44,11 @@ namespace
{
constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
-const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0,
- 2)
- * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("ax", 1, 3) * framework::dataset::make("ay", 1, 3) * framework::dataset::make("NumKernels", { 1, 3 })
- *framework::dataset::make("ux", 1, 4) *framework::dataset::make("uy", 1, 4);
+const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2)
+ * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
-const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0,
- 1)
- * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("ax", 1, 3) * framework::dataset::make("ay", 1, 3) * framework::dataset::make("NumKernels", { 1, 3 })
- *framework::dataset::make("ux", 1, 4) *framework::dataset::make("uy", 1, 4);
+const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1)
+ * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
} // namespace
diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h
index e98f5e93c0..f2455f31ac 100644
--- a/tests/validation/fixtures/DeconvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,20 +43,15 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DeconvolutionLayerFixtureBase : public framework::Fixture
{
public:
- /*
- *
- * @param[in] a The number of zeros added to right and bottom edges of the input.
- * @param[in] u How much to scale the X and Y axis.
- */
template <typename...>
void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
- const std::pair<unsigned int, unsigned int> &a, const std::pair<unsigned int, unsigned int> &u, DataType data_type, int fractional_bits)
+ const std::pair<unsigned int, unsigned int> &inner_border, DataType data_type, int fractional_bits)
{
_fractional_bits = fractional_bits;
_data_type = data_type;
- _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, a, u, data_type, fractional_bits);
- _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, a, data_type, fractional_bits);
+ _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, fractional_bits);
+ _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, fractional_bits);
}
protected:
@@ -75,13 +70,9 @@ protected:
library->fill_tensor_uniform(tensor, i);
}
}
- /*
- *
- * @param[in] a The number of zeros added to right and bottom edges of the input.
- * @param[in] u How much to scale the X and Y axis.
- */
+
TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape,
- const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a, const std::pair<float, float> &u, DataType data_type, int fixed_point_position)
+ const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &inner_border, DataType data_type, int fixed_point_position)
{
// Create tensors
TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, fixed_point_position);
@@ -91,7 +82,7 @@ protected:
// Create and configure function
FunctionType conv;
- conv.configure(&src, &weights, &bias, &dst, info, a.first, a.second, u.first, u.second);
+ conv.configure(&src, &weights, &bias, &dst, info, inner_border.first, inner_border.second);
ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -121,7 +112,7 @@ protected:
}
SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape,
- const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> a, DataType data_type, int fixed_point_position)
+ const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> inner_border, DataType data_type, int fixed_point_position)
{
// Create reference
SimpleTensor<T> src{ input_shape, data_type, 1, fixed_point_position };
@@ -133,7 +124,7 @@ protected:
fill(weights, 1);
fill(bias, 2);
- return reference::deconvolution_layer<T>(src, weights, bias, output_shape, info, a);
+ return reference::deconvolution_layer<T>(src, weights, bias, output_shape, info, inner_border);
}
TensorType _target{};
@@ -148,18 +139,16 @@ class DeconvolutionValidationFixture : public DeconvolutionLayerFixtureBase<Tens
public:
template <typename...>
void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady,
- unsigned int ax, unsigned int ay, unsigned int ux, unsigned int uy, unsigned int num_kernels, DataType data_type)
+ unsigned int inner_border_right, unsigned int inner_border_top, unsigned int num_kernels, DataType data_type)
{
ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported");
const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
const TensorShape bias_shape(num_kernels);
const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL);
- const std::pair<unsigned int, unsigned int> a(ax, ay);
- const std::pair<float, float> u(ux, uy);
- auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, padx, pady, a.first, a.second, u.first, u.second,
- DimensionRoundingType::CEIL);
+ const std::pair<unsigned int, unsigned int> inner_border(inner_border_right, inner_border_top);
+ auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, padx, pady, inner_border.first, inner_border.second, sx, sy);
TensorShape output_shape = deconvolution_output_shape(out_dim, input_shape, weights_shape);
- DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, a, u, data_type, 0);
+ DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, 0);
}
};
diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp
index 82c2188ade..0cf1087346 100644
--- a/tests/validation/reference/DeconvolutionLayer.cpp
+++ b/tests/validation/reference/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,26 +39,27 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens
const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a)
{
// Create reference
+ const int stride_x = info.stride().first;
+ const int stride_y = info.stride().second;
TensorShape scaled_shape = src.shape();
- scaled_shape.set(0, output_shape.x());
- scaled_shape.set(1, output_shape.y());
+ int out_x = src.shape().x() + (src.shape().x() - 1) * (stride_x - 1) + a.first + 2 * info.pad().first;
+ int out_y = src.shape().y() + (src.shape().y() - 1) * (stride_y - 1) + a.second + 2 * info.pad().second;
+ scaled_shape.set(0, out_x);
+ scaled_shape.set(1, out_y);
SimpleTensor<T> scaled{ scaled_shape, src.data_type(), 1, src.fixed_point_position() };
- const int width_in = src.shape().x();
- const int height_in = src.shape().y();
- const int width_scaled = scaled.shape().x();
- const int height_scaled = scaled.shape().y();
- const int num_2d_slices = src.shape().total_size() / (width_in * height_in);
- const float width_ratio = static_cast<float>(width_in) / static_cast<float>(width_scaled);
- const float height_ratio = static_cast<float>(height_in) / static_cast<float>(height_scaled);
- const int ax = a.first; // The number of zeros added to right edge of the input.
- const int ay = a.second; // The number of zeros added to bottom edge of the input.
- const unsigned int kernel_size = weights.shape().x();
- ARM_COMPUTE_ERROR_ON(info.pad().first > (kernel_size - 1));
- const int transposed_convolution_padx = kernel_size - info.pad().first - 1;
- const int transposed_convolution_pady = kernel_size - info.pad().second - 1;
- const int stridex = info.stride().first;
- const int stridey = info.stride().second;
+ const int width_in = src.shape().x();
+ const int height_in = src.shape().y();
+ const int width_scaled = scaled.shape().x();
+ const int height_scaled = scaled.shape().y();
+ const int num_2d_slices = src.shape().total_size() / (width_in * height_in);
+ const int ax = a.first; // The number of zeros added to right edge of the input.
+ const int ay = a.second; // The number of zeros added to top edge of the input.
+ ARM_COMPUTE_ERROR_ON(info.pad().first > (weights.shape().x() - 1));
+
+ ARM_COMPUTE_ERROR_ON_MSG(ax > stride_x - 1, "ax must be smaller than stride_x");
+ ARM_COMPUTE_ERROR_ON_MSG(ay > stride_y - 1, "ay must be smaller than stride_y");
+
for(int j = 0; j < scaled.num_elements(); ++j)
{
scaled[j] = T(0);
@@ -68,34 +69,23 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens
{
const int offset_slice_in = slice * width_in * height_in;
const int offset_slice_out = slice * width_scaled * height_scaled;
- for(int yi = ay; yi < height_scaled; yi += stridey)
+ const int start_x = info.pad().first;
+ const int start_y = ay + info.pad().second;
+ const int end_y = height_scaled - info.pad().second;
+ const int end_x = width_scaled - ax - info.pad().first;
+
+ for(int yi = start_y, in_y = 0; yi < end_y; yi += stride_y, in_y++)
{
- for(int xi = transposed_convolution_padx; xi < width_scaled; xi += stridex)
+ for(int xi = start_x, in_x = 0; xi < end_x; xi += stride_x, in_x++)
{
- const float x_src = (xi + 0.5f) * width_ratio - 0.5f;
- const float y_src = (yi + 0.5f) * height_ratio - 0.5f;
- T *out = scaled.data() + offset_slice_out + xi + yi * width_scaled;
- const bool in_bounds = x_src > -1 && y_src > -1 && x_src < width_in && y_src < height_in;
- const bool in_axy = xi < transposed_convolution_padx || xi >= (width_scaled - ax) // this is checking if the x coordinate is in the padded left/right area
- || yi < ay || yi >= (height_scaled - transposed_convolution_pady); // like above but top and bottom padding in the upscaled XY plane
- if(!in_axy)
- {
- if(in_bounds)
- {
- const int in_scaled_x = (x_src < 0.f) ? static_cast<int>(x_src - 0.5f) : static_cast<int>(x_src + 0.5f);
- const int in_scaled_y = (y_src < 0.f) ? static_cast<int>(y_src - 0.5f) : static_cast<int>(y_src + 0.5f);
- const T *in = src.data() + offset_slice_in + in_scaled_x + in_scaled_y * width_in;
- *out = *in;
- }
- else
- {
- *out = T(0);
- }
- }
+ const T *in = src.data() + offset_slice_in + in_y * width_in + in_x;
+ T *out = scaled.data() + offset_slice_out + xi + yi * width_scaled;
+ *out = *in;
}
}
}
- const PadStrideInfo conv_info(1, 1, 1, 1, DimensionRoundingType::CEIL);
+
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
return convolution_layer(scaled, weights, bias, output_shape, conv_info);
}
diff --git a/tests/validation/reference/DeconvolutionLayer.h b/tests/validation/reference/DeconvolutionLayer.h
index 8222e32027..c0bc1fa928 100644
--- a/tests/validation/reference/DeconvolutionLayer.h
+++ b/tests/validation/reference/DeconvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -42,7 +42,7 @@ namespace reference
* bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input.
* output_shape Output tensor shape. The output has the same number of dimensions as the @p input.
* info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
- * a The number of zeros added to right edge of the input.
+ * a The number of zeros added to right and top edges of the input.
*
*/
template <typename T>