From 780db4eb6a9e3dee565d14f36d772038cd3253da Mon Sep 17 00:00:00 2001
From: Michalis Spyrou <michalis.spyrou@arm.com>
Date: Thu, 23 Nov 2017 09:49:51 +0000
Subject: COMPMID-471 Implement Deconvolution on OpenCL

Change-Id: Ie00c6b08a51d30c5ce2637d40ee3d165b8a68686
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110311
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/CL/CLKernels.h                    |   3 +-
 .../kernels/CLDeconvolutionLayerUpsampleKernel.h   |  80 +++++++++
 arm_compute/core/NEON/NEKernels.h                  |   3 +-
 .../kernels/NEDeconvolutionLayerUpsampleKernel.h   |  72 --------
 arm_compute/core/Utils.h                           |  28 ++-
 arm_compute/core/utils/misc/ShapeCalculator.h      |  15 +-
 arm_compute/runtime/CL/CLFunctions.h               |   4 +-
 .../runtime/CL/functions/CLDeconvolutionLayer.h    | 103 +++++++++++
 .../CL/functions/CLDeconvolutionLayerUpsample.h    |  85 +++++++++
 .../CL/functions/CLDirectConvolutionLayer.h        |   3 +-
 arm_compute/runtime/NEON/NEFunctions.h             |   3 +-
 .../runtime/NEON/functions/NEDeconvolutionLayer.h  |  61 ++++---
 .../NEON/functions/NEDeconvolutionLayerUpsample.h  |  72 --------
 src/core/CL/CLKernelLibrary.cpp                    |   7 +-
 src/core/CL/cl_kernels/deconvolution_layer.cl      |  50 ++++++
 .../kernels/CLDeconvolutionLayerUpsampleKernel.cpp | 117 +++++++++++++
 .../kernels/NEDeconvolutionLayerUpsampleKernel.cpp | 165 ------------------
 src/core/Utils.cpp                                 |  32 ++--
 src/runtime/CL/functions/CLDeconvolutionLayer.cpp  | 132 ++++++++++++++
 .../CL/functions/CLDeconvolutionLayerUpsample.cpp  |  64 +++++++
 .../NEON/functions/NEDeconvolutionLayer.cpp        | 105 +++++------
 .../functions/NEDeconvolutionLayerUpsample.cpp     | 121 -------------
 tests/datasets/ShapeDatasets.h                     |   4 +-
 tests/validation/CL/DeconvolutionLayer.cpp         | 192 +++++++++++++++++++++
 tests/validation/NEON/DeconvolutionLayer.cpp       |  14 +-
 .../fixtures/DeconvolutionLayerFixture.h           |  37 ++--
 tests/validation/reference/DeconvolutionLayer.cpp  |  72 ++++----
 tests/validation/reference/DeconvolutionLayer.h    |   4 +-
 28 files changed, 1016 insertions(+), 632 deletions(-)
 create mode 100644 arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h
 create mode 100644 arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
 create mode 100644 arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
 delete mode 100644 arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h
 create mode 100644 src/core/CL/cl_kernels/deconvolution_layer.cl
 create mode 100644 src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
 delete mode 100644 src/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.cpp
 create mode 100644 src/runtime/CL/functions/CLDeconvolutionLayer.cpp
 create mode 100644 src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp
 delete mode 100644 src/runtime/NEON/functions/NEDeconvolutionLayerUpsample.cpp
 create mode 100644 tests/validation/CL/DeconvolutionLayer.cpp

diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
index 9da0e5ab3a..64687fb26a 100644
--- a/arm_compute/core/CL/CLKernels.h
+++ b/arm_compute/core/CL/CLKernels.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,6 +42,7 @@
 #include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
 #include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
 #include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
+#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
 #include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
 #include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
 #include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h"
diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
new file mode 100644
index 0000000000..8867ca1c37
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H__
+#define __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the Deconvolution layer kernel on OpenCL.
+ */
+class CLDeconvolutionLayerUpsampleKernel : public ICLKernel
+{
+public:
+    /** Constructor */
+    CLDeconvolutionLayerUpsampleKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDeconvolutionLayerUpsampleKernel(const CLDeconvolutionLayerUpsampleKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDeconvolutionLayerUpsampleKernel &operator=(const CLDeconvolutionLayerUpsampleKernel &) = delete;
+    /** Default Move Constructor. */
+    CLDeconvolutionLayerUpsampleKernel(CLDeconvolutionLayerUpsampleKernel &&) = default;
+    /** Default move assignment operator. */
+    CLDeconvolutionLayerUpsampleKernel &operator=(CLDeconvolutionLayerUpsampleKernel &&) = default;
+    /** Default destructor */
+    ~CLDeconvolutionLayerUpsampleKernel() = default;
+
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  input        Source tensor. Data types supported: F32.
+     * @param[out] output       Destination tensor. Data types supported: F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+     * @param[in]  inner_border Top and right inner border sizes. These rows and columns will be filled with zero.
+     * @param[in]  info         Contains padding and stride information described in @ref PadStrideInfo.
+     */
+    void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border, const PadStrideInfo &info);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
+     *
+     * @param[in] input        Source tensor info. Data types supported: F32.
+     * @param[in] output       Destination tensor info. Data types supported: F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+     * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled with zero.
+     * @param[in] info         Contains padding and stride information described in @ref PadStrideInfo.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const BorderSize &inner_border, const PadStrideInfo &info);
+
+    // Inherited methods overridden:
+    void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+    const ICLTensor *_input;
+    ICLTensor       *_output;
+    BorderSize       _inner_border;
+    PadStrideInfo    _info;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H__ */
diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h
index 8a4cf7abeb..d5c4c340ee 100644
--- a/arm_compute/core/NEON/NEKernels.h
+++ b/arm_compute/core/NEON/NEKernels.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,7 +43,6 @@
 #include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h"
 #include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
 #include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
diff --git a/arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h
deleted file mode 100644
index 707564683f..0000000000
--- a/arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDECONVOLUTIONLAYERKERNEL_H__
-#define __ARM_COMPUTE_NEDECONVOLUTIONLAYERKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform scaling on a tensor */
-class NEDeconvolutionLayerUpsampleKernel : public INEKernel
-{
-public:
-    /** Default constructor */
-    NEDeconvolutionLayerUpsampleKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDeconvolutionLayerUpsampleKernel(const NEDeconvolutionLayerUpsampleKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDeconvolutionLayerUpsampleKernel &operator=(const NEDeconvolutionLayerUpsampleKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEDeconvolutionLayerUpsampleKernel(NEDeconvolutionLayerUpsampleKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEDeconvolutionLayerUpsampleKernel &operator=(NEDeconvolutionLayerUpsampleKernel &&) = default;
-    /** Default destructor */
-    ~NEDeconvolutionLayerUpsampleKernel() = default;
-
-    /** Initialise the kernel's inputs, output and interpolation policy
-     *
-     * @param[in]  input   Source tensor. Data types supported: F32.
-     * @param[in]  offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
-     * @param[out] output  Destination tensor. Data types supported: F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
-     */
-    void configure(const ITensor *input, const ITensor *offsets, ITensor *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    /** Function to perform scale using nearest interpolation on the given window */
-    void scale_nearest(const Window &window);
-
-    const ITensor *_offsets;
-    const ITensor *_input;
-    ITensor       *_output;
-};
-} // arm_compute
-#endif /*__ARM_COMPUTE_NEDECONVOLUTIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index f78add13f9..51967b1762 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016, 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -614,25 +614,23 @@ TensorShape deconvolution_output_shape(const std::pair<unsigned int, unsigned in
 
 /** Returns expected width and height of the deconvolution's output tensor.
  *
- * @param[in] in_width      Width of input tensor (Number of columns)
- * @param[in] in_height     Height of input tensor (Number of rows)
- * @param[in] kernel_width  Kernel width.
- * @param[in] kernel_height Kernel height.
- * @param[in] padx          X axis padding.
- * @param[in] pady          Y axis padding.
- * @param[in] ax            The number of zeros added to right edge of the input.
- * @param[in] ay            The number of zeros added to top edge of the input.
- * @param[in] upscalex      How much to scale the X axis.
- * @param[in] upscaley      How much to scale the Y axis.
- * @param[in] round         Rounding policy to be used when computing the output's dimensions.
+ * @param[in] in_width           Width of input tensor (Number of columns)
+ * @param[in] in_height          Height of input tensor (Number of rows)
+ * @param[in] kernel_width       Kernel width.
+ * @param[in] kernel_height      Kernel height.
+ * @param[in] padx               X axis padding.
+ * @param[in] pady               Y axis padding.
+ * @param[in] inner_border_right The number of zeros added to right edge of the input.
+ * @param[in] inner_border_top   The number of zeros added to top edge of the input.
+ * @param[in] stride_x           X axis input stride.
+ * @param[in] stride_y           Y axis input stride.
  *
  * @return A pair with the new width in the first position and the new height in the second.
  */
-
 const std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height,
                                                                             unsigned int kernel_width, unsigned int kernel_height,
-                                                                            unsigned int padx, unsigned int pady, unsigned int ax, unsigned int ay,
-                                                                            float upscalex, float upscaley, DimensionRoundingType round);
+                                                                            unsigned int padx, unsigned int pady, unsigned int inner_border_right, unsigned int inner_border_top,
+                                                                            unsigned int stride_x, unsigned int stride_y);
 
 /** Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
  *
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index f31eb3d336..c7667f2c7b 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -106,7 +106,8 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
 
     unsigned int output_width  = 0;
     unsigned int output_height = 0;
-    std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), conv_info);
+    std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(),
+                                                              weights_shape.y(), conv_info);
 
     TensorShape output_shape{ input_shape };
     output_shape.set(0, output_width);
@@ -114,6 +115,16 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
 
     return output_shape;
 }
+inline TensorShape compute_deconvolution_shape(const ITensorInfo &input, unsigned int sx, unsigned int sy, unsigned int inner_border_right, unsigned int inner_border_top, const PadStrideInfo &info)
+{
+    TensorShape        scale_out_shape(input.tensor_shape());
+    const unsigned int out_x = input.dimension(0) + (input.dimension(0) - 1) * (sx - 1) + inner_border_right + 2 * info.pad().first;
+    const unsigned int out_y = input.dimension(1) + (input.dimension(1) - 1) * (sy - 1) + inner_border_top + 2 * info.pad().second;
+    scale_out_shape.set(0, out_x);
+    scale_out_shape.set(1, out_y);
+
+    return scale_out_shape;
+}
 } // namespace shape_calculator
 } // namespace misc
 } // namespace arm_compute
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index f6ecef7a51..1154ab79aa 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,6 +42,8 @@
 #include "arm_compute/runtime/CL/functions/CLColorConvert.h"
 #include "arm_compute/runtime/CL/functions/CLConvolution.h"
 #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
 #include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h"
 #include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
 #include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
new file mode 100644
index 0000000000..e98cc9b3d6
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDECONVOLUTIONLAYER_H__
+#define __ARM_COMPUTE_CLDECONVOLUTIONLAYER_H__
+
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
+#include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
+
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+/** Function to run the deconvolution layer.
+ *
+ * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input depending on the stride and pad info and then perform a 1x1
+ * convolution pass. Input stride defines how many zeroes we should put between each element of the input, pad is the amount of padding and finally a is a user
+ * specified value where a < stride - 1, that increases the padding top and right of the input image.
+ *
+ *  The relation between input to output is as follows:
+ *      width_output = round((width_input − 1) ∗ (stride_x - 1) − 2 ∗ padding_x + kernel_x + inner_border_right )
+ *      height_output = round((height_input − 1) ∗ (stride_y - 1) − 2 ∗ padding_y + kernel_y + inner_border_top )
+ *
+ *  where:
+ *      width_input is the size of the first input dimension.
+ *      height_input is the size of the second input dimension.
+ *      width_output is the size of the first output dimension.
+ *      height_output is the size of the second output dimension.
+ *      kernel_x and kernel_y are the convolution sizes in x and y.
+ *      inner_border_right and inner_border_top the number of zeros added to the right and top edges of the input.
+ *      stride_x and stride_y is the input stride of the first and second dimension.
+ *
+ */
+class CLDeconvolutionLayer : public IFunction
+{
+public:
+    /** Constructor */
+    CLDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Set the input, weights, biases and output tensors.
+     *
+     * @param[in,out] input              Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+     * @param[in]     weights            The 4d weights with dimensions [width, height, OFM, IFM]. Data type supported: Same as @p input.
+     * @param[in]     bias               (Optional) The biases have one dimension. Data type supported: Same as @p input.
+     * @param[out]    output             Output tensor. The output has the same number of dimensions as the @p input.
+     * @param[in]     info               Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+     * @param[in]     inner_border_right The number of zeros added to right edge of the input.
+     * @param[in]     inner_border_top   The number of zeros added to top edge of the input.
+     *
+     */
+    void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info,
+                   unsigned int inner_border_right, unsigned int inner_border_top);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayer
+     *
+     * @param[in] input              Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+     * @param[in] weights            The 4d weights info with dimensions [width, height, OFM, IFM]. Data type supported: Same as @p input.
+     * @param[in] bias               (Optional) The biases have one dimension. Data type supported: Same as @p input.
+     * @param[in] output             Output tensor info. The output has the same number of dimensions as the @p input.
+     * @param[in] info               Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+     * @param[in] inner_border_right The number of zeros added to right edge of the input.
+     * @param[in] inner_border_top   The number of zeros added to top edge of the input.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
+                           unsigned int inner_border_right, unsigned int inner_border_top);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLMemoryGroup                _memory_group;
+    CLDeconvolutionLayerUpsample _scale_f;
+    CLDirectConvolutionLayer     _conv_f;
+    CLTensor                     _scaled_output;
+};
+}
+#endif /* __ARM_COMPUTE_CLDECONVOLUTIONLAYER_H__ */
\ No newline at end of file
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
new file mode 100644
index 0000000000..74ee4efb2c
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H__
+#define __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLDeconvolutionLayerUpsampleKernel */
+class CLDeconvolutionLayerUpsample : public IFunction
+{
+public:
+    /** Default constructor */
+    CLDeconvolutionLayerUpsample();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDeconvolutionLayerUpsample(const CLDeconvolutionLayerUpsample &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDeconvolutionLayerUpsample &operator=(const CLDeconvolutionLayerUpsample &) = delete;
+    /** Allow instances of this class to be moved */
+    CLDeconvolutionLayerUpsample(CLDeconvolutionLayerUpsample &&) = default;
+    /** Allow instances of this class to be moved */
+    CLDeconvolutionLayerUpsample &operator=(CLDeconvolutionLayerUpsample &&) = default;
+    /** Default destructor */
+    virtual ~CLDeconvolutionLayerUpsample() = default;
+
+    /** Initialize the function's source, destination, interpolation type and border_mode.
+     *
+     * @param[in, out] input        Source tensor. Data type supported: F32.
+     * @param[out]     output       Destination tensor. Data type supported: F32.
+     * @param[in]      inner_border The number of zeros added to right and top edges of the input.
+     * @param[in]      info         Contains padding and policies to be used in the deconvolution.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
+                   const PadStrideInfo &info);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
+     *
+     * @param[in] input        Source tensor info. Data type supported: F32.
+     * @param[in] output       Destination tensor info. Data type supported: F32.
+     * @param[in] inner_border The number of zeros added to right and top edges of the input.
+     * @param[in] info         Contains padding and policies to be used in the deconvolution.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const BorderSize &inner_border,
+                           const PadStrideInfo &info);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLDeconvolutionLayerUpsampleKernel _upsample;
+    ICLTensor                         *_output;
+};
+}
+#endif /* __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
index f31a45be90..205c90c478 100644
--- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,6 +40,7 @@ class ICLTensor;
 class CLDirectConvolutionLayer : public IFunction
 {
 public:
+    /** Default constructor */
     CLDirectConvolutionLayer();
     /** Set the input and output tensors.
      *
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index 08852cf368..d09fcb280c 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,7 +44,6 @@
 #include "arm_compute/runtime/NEON/functions/NEConvolution.h"
 #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h"
 #include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index 8757bc63aa..091a928db6 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #ifndef __ARM_COMPUTE_NEDECONVOLUTIONLAYER_H__
 #define __ARM_COMPUTE_NEDECONVOLUTIONLAYER_H__
 
-#include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h"
 #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
 
 #include "arm_compute/core/Types.h"
@@ -39,13 +38,13 @@ namespace arm_compute
 {
 /** Function to run the deconvolution layer.
  *
- *  The operation is similar to convolution but it's implemented by up-sampling the inputs with zeros insertions between the inputs and convolving
- *  the kernels on the up-sampled result.
+ * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input depending on the stride and pad info and then perfrom a 1x1
+ * convolution pass. Input stride defines how many zeroes we should put between each element of the input, pad is the amount of padding and finaly a is a user
+ * specified value where a < stride - 1 that increases the padding top and right of the input image.
  *
- *  Before the Deconvolution is done, up-scaling the first 2D with zeros is performed. The relation between input to
- *  output is as follows:
- *      width_output = round((width_input − 1) ∗ upscale_x − 2 ∗ padding_x + kernel_x + a_x )
- *      height_output = round((height_input − 1) ∗ upscale_y − 2 ∗ padding_y + kernel_y + a_y )
+ *  The relation between input to output is as follows:
+ *       width_output = round((width_input − 1) ∗ (stride_x - 1) − 2 ∗ padding_x + kernel_x + inner_border_right )
+ *       height_output = round((height_input − 1) ∗ (stride_y - 1) − 2 ∗ padding_y + kernel_y + inner_border_top )
  *
  *  where
  *      width is the size of the first input dimension.
@@ -53,44 +52,54 @@ namespace arm_compute
  *      width_output is the size of the first output dimension.
  *      height_output is the size of the second output dimension.
  *      kernel_x and kernel_y are the convolution sizes in x and y.
- *      ax and ay the number of zeros added to the top and right edges of the input.
- *      upscale_x and upscale_y how much to scale the X and Y axis.
+ *      inner_border_right and inner_border_top the number of zeros added to the top and right edges of the input.
+ *      stride_x and stride_y is the input stride of the first and second dimension.
  *
  *  This function calls the following NEON kernels:
  *
- * -# @ref NEDeconvolutionLayerUpsampleKernel
  * -# @ref NEDirectConvolutionLayer
  *
  */
 class NEDeconvolutionLayer : public IFunction
 {
 public:
-    /** Constructor */
+    /** Default constructor */
     NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDeconvolutionLayer(const NEDeconvolutionLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDeconvolutionLayer &operator=(const NEDeconvolutionLayer &) = delete;
+    /** Allow instances of this class to be moved */
+    NEDeconvolutionLayer(NEDeconvolutionLayer &&) = default;
+    /** Allow instances of this class to be moved */
+    NEDeconvolutionLayer &operator=(NEDeconvolutionLayer &&) = default;
+    /** Default destructor */
+    virtual ~NEDeconvolutionLayer() = default;
     /** Set the input, weights, biases and output tensors.
      *
-     * @param[in,out] input    Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
-     * @param[in]     weights  The 4d weights with dimensions [width, height, OFM, IFM]. Data type supported: Same as @p input.
-     * @param[in]     bias     Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input.
-     * @param[out]    output   Output tensor. The output has the same number of dimensions as the @p input.
-     * @param[in]     info     Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
-     * @param[in]     ax       The number of zeros added to right edge of the input.
-     * @param[in]     ay       The number of zeros added to top edge of the input.
-     * @param[in]     upscalex How much to scale the X axis.
-     * @param[in]     upscaley How much to scale the Y axis.
+     * @param[in,out] input              Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+     * @param[in]     weights            The 4d weights with dimensions [width, height, OFM, IFM]. Data type supported: Same as @p input.
+     * @param[in]     bias               Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input.
+     * @param[out]    output             Output tensor. The output has the same number of dimensions as the @p input.
+     * @param[in]     info               Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+     * @param[in]     inner_border_right The number of zeros added to right edge of the input.
+     * @param[in]     inner_border_top   The number of zeros added to top edge of the input.
      *
      */
     void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info,
-                   unsigned int ax, unsigned int ay, float upscalex, float upscaley);
+                   unsigned int inner_border_right, unsigned int inner_border_top);
 
     // Inherited methods overridden:
     void run() override;
 
 private:
-    MemoryGroup                  _memory_group;
-    NEDeconvolutionLayerUpsample _scale_f;
-    NEDirectConvolutionLayer     _conv_f;
-    Tensor                       _scaled_output;
+    MemoryGroup              _memory_group;
+    NEDirectConvolutionLayer _conv_f;
+    Tensor                   _scaled_output;
+    ITensor                 *_input;
+    PadStrideInfo            _info;
+    std::pair<unsigned int, unsigned int> _inner_border;
 };
 } // arm_compute
 #endif /* __ARM_COMPUTE_NEDECONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h
deleted file mode 100644
index d2ac12a58a..0000000000
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDECONVOLUTIONUPSAMPLE_H__
-#define __ARM_COMPUTE_NEDECONVOLUTIONUPSAMPLE_H__
-
-#include "arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEDeconvolutionLayerUpsampleKernel */
-class NEDeconvolutionLayerUpsample : public IFunction
-{
-public:
-    /** Constructor
-     *
-     * Initialize NEDeconvolutionLayerUpsample
-     */
-    NEDeconvolutionLayerUpsample(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-    /** Initialize the function's source, destination, interpolation type and border_mode.
-     *
-     * @param[in, out] input  Source tensor. Data type supported: F32.
-     * @param[out]     output Destination tensor. Data type supported: F32.
-     * @param[in]      a      Top and right inner border sizes. These rows and columns will be filled with zero.
-     * @param[in]      iz     The number of zeros to be inserted between each input sample
-     * @param[in]      info   Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
-     */
-    void configure(ITensor *input, ITensor *output, const std::pair<unsigned int, unsigned int> &a,
-                   const std::pair<unsigned int, unsigned int> &iz, const PadStrideInfo &info);
-
-    // Inherited methods overridden:
-    void run() override;
-
-private:
-    MemoryGroup                        _memory_group;
-    Tensor                             _offsets;
-    NEFillBorderKernel                 _border_handler;
-    NEDeconvolutionLayerUpsampleKernel _upsample;
-};
-} // arm_compute
-#endif /*__ARM_COMPUTE_NEDECONVOLUTIONUPSAMPLE_H__ */
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index de75518a05..352b89baa5 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -186,6 +186,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
     { "copy_plane", "channel_extract.cl" },
     { "copy_planes_3p", "channel_combine.cl" },
     { "copy_to_keypoint", "fast_corners.cl" },
+    { "deconvolution_upsample", "deconvolution_layer.cl" },
     { "depthwise_convolution_3x3", "depthwise_convolution.cl" },
     { "depthwise_convolution_3x3_quantized", "depthwise_convolution_quantized.cl" },
     { "depthwise_im2col", "depthwise_convolution.cl" },
@@ -419,6 +420,10 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
     {
         "convolution_rectangle.cl",
 #include "./cl_kernels/convolution_rectangle.clembed"
+    },
+    {
+        "deconvolution_layer.cl",
+#include "./cl_kernels/deconvolution_layer.clembed"
     },
     {
         "depth_convert.cl",
diff --git a/src/core/CL/cl_kernels/deconvolution_layer.cl b/src/core/CL/cl_kernels/deconvolution_layer.cl
new file mode 100644
index 0000000000..2514ddc8cc
--- /dev/null
+++ b/src/core/CL/cl_kernels/deconvolution_layer.cl
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+/** This function applies upsample on an input image.
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data types: F32
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+__kernel void deconvolution_upsample(
+    IMAGE_DECLARATION(src),
+    IMAGE_DECLARATION(dst))
+{
+    Image src = CONVERT_TO_IMAGE_STRUCT(src);
+    Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+    // Store result
+    *((__global float *)dst.ptr) = *((__global float *)src.ptr);
+}
diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
new file mode 100644
index 0000000000..5c08d5bee2
--- /dev/null
+++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+using namespace arm_compute;
+
+CLDeconvolutionLayerUpsampleKernel::CLDeconvolutionLayerUpsampleKernel()
+    : _input(nullptr), _output(nullptr), _inner_border(), _info()
+{
+}
+
+Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const BorderSize &inner_border,
+                                                    const PadStrideInfo &info)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_UNUSED(info);
+
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) == 0);
+    ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) == 0);
+
+    for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i));
+    }
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.right > info.stride().first - 1, "inner_border_right must be smaller that stride_x");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.top > info.stride().second - 1, "inner_border_top must be smaller that stride_y");
+
+    return Status{};
+}
+
+void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
+                                                   const PadStrideInfo &info)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+    _input        = input;
+    _output       = output;
+    _inner_border = inner_border;
+    _info         = info;
+
+    // Perform validation step
+    ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayerUpsampleKernel::validate(input->info(), output->info(), inner_border, info));
+
+    // Create kernel
+    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("deconvolution_upsample"));
+
+    constexpr unsigned int num_elems_processed_per_iteration = 1;
+
+    // Configure kernel window
+    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
+
+    AccessWindowHorizontal output_access(output->info(), 0, 0, num_elems_processed_per_iteration);
+    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+
+    ICLKernel::configure(win);
+}
+
+void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+    const int out_start_x = _info.pad().first;
+    const int out_end_x   = _output->info()->dimension(0) - _inner_border.right - _info.pad().first + _info.stride().first - 1;
+    const int out_step_x  = _info.stride().first;
+
+    const int out_start_y = _inner_border.top + _info.pad().second;
+    const int out_end_y   = _output->info()->dimension(1) - _info.pad().second + _info.stride().second - 1;
+    const int out_step_y  = _info.stride().second;
+
+    Window slice_out = window.first_slice_window_2D();
+    slice_out.set(Window::DimX, Window::Dimension(out_start_x, out_end_x, out_step_x));
+    slice_out.set(Window::DimY, Window::Dimension(out_start_y, out_end_y, out_step_y));
+
+    Window slice_in = window.first_slice_window_2D();
+
+    do
+    {
+        unsigned int idx = 0;
+        add_2D_tensor_argument(idx, _input, slice_in);
+        add_2D_tensor_argument(idx, _output, slice_out);
+        enqueue(queue, *this, slice_out);
+    }
+    while(window.slide_window_slice_2D(slice_in) && window.slide_window_slice_2D(slice_out));
+}
diff --git a/src/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.cpp b/src/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.cpp
deleted file mode 100644
index 71db2e9782..0000000000
--- a/src/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include <arm_neon.h>
-#include <cstddef>
-#include <cstdint>
-
-using namespace arm_compute;
-
-NEDeconvolutionLayerUpsampleKernel::NEDeconvolutionLayerUpsampleKernel()
-    : _offsets(nullptr), _input(nullptr), _output(nullptr)
-{
-}
-
-BorderSize NEDeconvolutionLayerUpsampleKernel::border_size() const
-{
-    return BorderSize(1);
-}
-
-void NEDeconvolutionLayerUpsampleKernel::configure(const ITensor *input, const ITensor *offsets, ITensor *output)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) == 0);
-    ARM_COMPUTE_ERROR_ON(output->info()->dimension(1) == 0);
-
-    for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
-    {
-        ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i));
-    }
-
-    _input   = input;
-    _output  = output;
-    _offsets = offsets;
-
-    constexpr unsigned int num_elems_processed_per_iteration = 16;
-    const int              border_offset                     = border_size().left;
-
-    // Configure kernel window
-    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-
-    AccessWindowRectangle  input_access(input->info(), -border_offset, -border_offset, input->info()->dimension(0) + border_offset, input->info()->dimension(1) + border_offset);
-    AccessWindowHorizontal offsets_access(offsets->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
-    update_window_and_padding(win, input_access, offsets_access, output_access);
-
-    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
-    INEKernel::configure(win);
-}
-
-void NEDeconvolutionLayerUpsampleKernel::scale_nearest(const Window &window)
-{
-    const size_t input_stride = _input->info()->strides_in_bytes()[1];
-
-    // Compute the ratio between source height and destination height
-    const auto hr = static_cast<float>(_input->info()->dimension(1)) / static_cast<float>(_output->info()->dimension(1));
-
-    // Don't increment in X and Y direction for the input tensor
-    // A pointer to the start of this plane is needed as base for the precomputed offsets
-    Window win_in(window);
-    win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
-    win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
-
-    Window win_off;
-    win_off.set(Window::DimX, window[Window::DimX]);
-    win_off.set(Window::DimY, window[Window::DimY]);
-
-    for(size_t d = Window::DimZ; d < _offsets->info()->num_dimensions(); ++d)
-    {
-        win_off.set(d, Window::Dimension(0, 0, 0));
-    }
-
-    Iterator in(_input, win_in);
-    Iterator out(_output, window);
-    Iterator offsets(_offsets, win_off);
-
-    switch(_input->info()->data_type())
-    {
-        case DataType::F32:
-        {
-            float32x4x4_t tmp =
-            {
-                {
-                    vdupq_n_f32(0),
-                    vdupq_n_f32(0)
-                }
-            };
-            execute_window_loop(window, [&](const Coordinates & id)
-            {
-                const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
-
-                const size_t in_yi      = (id.y() + 0.5f) * hr;
-                const size_t offset_row = in_yi * input_stride;
-
-                tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0);
-                tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[4] + offset_row), tmp.val[0], 1);
-                tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[8] + offset_row), tmp.val[0], 2);
-                tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[12] + offset_row), tmp.val[0], 3);
-
-                tmp.val[1] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[1] + offset_row), tmp.val[1], 0);
-                tmp.val[1] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[5] + offset_row), tmp.val[1], 1);
-                tmp.val[1] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[9] + offset_row), tmp.val[1], 2);
-                tmp.val[1] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[13] + offset_row), tmp.val[1], 3);
-
-                tmp.val[2] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[2] + offset_row), tmp.val[2], 0);
-                tmp.val[2] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[6] + offset_row), tmp.val[2], 1);
-                tmp.val[2] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[10] + offset_row), tmp.val[2], 2);
-                tmp.val[2] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[14] + offset_row), tmp.val[2], 3);
-
-                tmp.val[3] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[3] + offset_row), tmp.val[3], 0);
-                tmp.val[3] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[7] + offset_row), tmp.val[3], 1);
-                tmp.val[3] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[11] + offset_row), tmp.val[3], 2);
-                tmp.val[3] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[15] + offset_row), tmp.val[3], 3);
-
-                vst4q_f32(reinterpret_cast<float *>(out.ptr()), tmp);
-            },
-            in, offsets, out);
-            break;
-        }
-        default:
-            ARM_COMPUTE_ERROR("Not supported");
-            break;
-    }
-}
-
-void NEDeconvolutionLayerUpsampleKernel::run(const Window &window, const ThreadInfo &info)
-{
-    ARM_COMPUTE_UNUSED(info);
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-    scale_nearest(window);
-}
diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp
index 76d0b0f059..a8249c4840 100644
--- a/src/core/Utils.cpp
+++ b/src/core/Utils.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016, 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -261,29 +261,17 @@ TensorShape arm_compute::deconvolution_output_shape(const std::pair<unsigned int
 
 const std::pair<unsigned int, unsigned int> arm_compute::deconvolution_output_dimensions(
     unsigned int in_width, unsigned int in_height, unsigned int kernel_width, unsigned int kernel_height, unsigned int padx, unsigned int pady,
-    unsigned int ax, unsigned int ay, float upscalex, float upscaley, DimensionRoundingType round)
+    unsigned int inner_border_right, unsigned int inner_border_top, unsigned int stride_x, unsigned int stride_y)
 {
     ARM_COMPUTE_ERROR_ON(in_width < 1 || in_height < 1);
-    ARM_COMPUTE_ERROR_ON(((in_width - 1) * upscalex + kernel_width + ax) < 2.f * padx);
-    ARM_COMPUTE_ERROR_ON(((in_height - 1) * upscaley + kernel_height + ay) < 2.f * pady);
-    const float fw = (in_width - 1) * upscalex - 2.f * padx + kernel_width + ax;
-    const float fh = (in_height - 1) * upscaley - 2.f * pady + kernel_height + ay;
-    int         w  = 0;
-    int         h  = 0;
-    switch(round)
-    {
-        case DimensionRoundingType::FLOOR:
-            w = std::floor(fw);
-            h = std::floor(fh);
-            break;
-        case DimensionRoundingType::CEIL:
-            w = std::ceil(fw);
-            h = std::ceil(fh);
-            break;
-        default:
-            ARM_COMPUTE_ERROR("Not supported");
-            break;
-    }
+    ARM_COMPUTE_ERROR_ON(((in_width - 1) * stride_x + kernel_width + inner_border_right) < 2 * padx);
+    ARM_COMPUTE_ERROR_ON(((in_height - 1) * stride_y + kernel_height + inner_border_top) < 2 * pady);
+    const int padx_deconv = (kernel_width - padx - 1);
+    const int pady_deconv = (kernel_height - pady - 1);
+    ARM_COMPUTE_ERROR_ON(padx_deconv < 0);
+    ARM_COMPUTE_ERROR_ON(pady_deconv < 0);
+    const int w = stride_x * (in_width - 1) + kernel_width + inner_border_right - 2 * padx_deconv;
+    const int h = stride_y * (in_height - 1) + kernel_height + inner_border_top - 2 * pady_deconv;
     return std::make_pair<unsigned int, unsigned int>(w, h);
 }
 
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
new file mode 100644
index 0000000000..1c55722344
--- /dev/null
+++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include <memory>
+#include <tuple>
+
+using namespace arm_compute;
+using namespace arm_compute::misc::shape_calculator;
+
+CLDeconvolutionLayer::CLDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+    : _memory_group(std::move(memory_manager)),
+      _scale_f(),
+      _conv_f(),
+      _scaled_output()
+{
+}
+
+Status CLDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
+                                      unsigned int inner_border_right, unsigned int inner_border_top)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1));
+    ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) < 1);
+
+    const unsigned int stride_x = info.stride().first;
+    const unsigned int stride_y = info.stride().second;
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_right > stride_x - 1, "inner_border_right must be smaller than stride_x");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_top > stride_y - 1, "inner_border_top must be smaller than stride_y");
+
+    auto out_dims = deconvolution_output_dimensions(input->dimension(0), input->dimension(1), weights->dimension(0), weights->dimension(1),
+                                                    info.pad().first, info.pad().second, inner_border_right, inner_border_top, stride_x, stride_y);
+
+    const TensorShape output_shape = deconvolution_output_shape(out_dims, input->tensor_shape(), weights->tensor_shape());
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights, bias);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output, weights, bias);
+
+    if(bias != nullptr)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, bias);
+    }
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(), "Output's width is invalid.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) != output_shape.y(), "Output's height is invalid.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), "Output's depth is invalid.");
+
+    TensorInfo scale_out_info(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_deconvolution_shape(*input, stride_x, stride_y, inner_border_right, inner_border_top,
+                                                                                                      info)));
+    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+
+    ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(input, &scale_out_info, BorderSize(inner_border_right, inner_border_top), info));
+    ARM_COMPUTE_RETURN_ON_ERROR(CLDirectConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info));
+
+    return Status{};
+}
+
+void CLDeconvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info,
+                                     unsigned int inner_border_right, unsigned int inner_border_top)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+
+    const unsigned int stride_x = info.stride().first;
+    const unsigned int stride_y = info.stride().second;
+
+    auto out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1),
+                                                    info.pad().first, info.pad().second, inner_border_top, inner_border_right, stride_x, stride_y);
+
+    const TensorShape output_shape = deconvolution_output_shape(out_dims, input->info()->tensor_shape(), weights->info()->tensor_shape());
+
+    // Output auto initialization if not yet initialized
+    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+
+    // Perform validation step
+    ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayer::validate(input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), info, inner_border_right, inner_border_top));
+
+    _memory_group.manage(&_scaled_output);
+
+    // configure scale function
+    // Init and allocate intermmidiate tensor for output, same size as input but the first two axis are the same as the output tensor
+    TensorShape        scale_out_shape(input->info()->tensor_shape());
+    const unsigned int out_x = input->info()->dimension(0) + (input->info()->dimension(0) - 1) * (stride_x - 1) + inner_border_right + 2 * info.pad().first;
+    const unsigned int out_y = input->info()->dimension(1) + (input->info()->dimension(1) - 1) * (stride_y - 1) + inner_border_top + 2 * info.pad().second;
+    scale_out_shape.set(0, out_x);
+    scale_out_shape.set(1, out_y);
+    TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+    _scaled_output.allocator()->init(scale_out_info);
+
+    _scale_f.configure(input, &_scaled_output, BorderSize(inner_border_top, inner_border_right), info);
+
+    // setup the function to convolve the upscaled output
+    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+    _conv_f.configure(&_scaled_output, weights, bias, output, conv_info);
+    _scaled_output.allocator()->allocate();
+}
+
+void CLDeconvolutionLayer::run()
+{
+    _memory_group.acquire();
+    _scale_f.run();
+    _conv_f.run();
+    _memory_group.release();
+}
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp b/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp
new file mode 100644
index 0000000000..13a24f8ba4
--- /dev/null
+++ b/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+#include <cmath>
+#include <memory>
+#include <tuple>
+
+using namespace arm_compute;
+
+CLDeconvolutionLayerUpsample::CLDeconvolutionLayerUpsample() // NOLINT
+    : _upsample(),
+      _output(nullptr)
+{
+}
+
+Status CLDeconvolutionLayerUpsample::validate(const ITensorInfo *input, const ITensorInfo *output, const BorderSize &inner_border,
+                                              const PadStrideInfo &info)
+{
+    return CLDeconvolutionLayerUpsampleKernel::validate(input, output, inner_border, info);
+}
+
+void CLDeconvolutionLayerUpsample::configure(ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
+                                             const PadStrideInfo &info)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+    _output = output;
+    _upsample.configure(input, _output, inner_border, info);
+}
+
+void CLDeconvolutionLayerUpsample::run()
+{
+    _output->map(CLScheduler::get().queue(), true);
+    memset(_output->buffer(), 0, _output->info()->total_size());
+    _output->unmap(CLScheduler::get().queue());
+
+    CLScheduler::get().enqueue(_upsample, false);
+}
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index 7b4e77b296..c4bca11d14 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,38 +24,41 @@
 #include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h"
 
 #include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/PixelValue.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 
 using namespace arm_compute;
+using namespace arm_compute::misc::shape_calculator;
 
 NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
     : _memory_group(std::move(memory_manager)),
-      _scale_f(),
       _conv_f(),
-      _scaled_output()
+      _scaled_output(),
+      _input(nullptr),
+      _info(),
+      _inner_border()
 {
 }
 
 void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info,
-                                     unsigned int ax, unsigned int ay, float upscalex, float upscaley)
+                                     unsigned int inner_border_right, unsigned int inner_border_top)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(output);
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
     ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != weights->info()->dimension(1));
-    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) < 1);
+    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 1 && weights->info()->dimension(0) != 3 && weights->info()->dimension(0) != 5);
 
-    auto out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1),
-                                                    info.pad().first, info.pad().second, ax, ay, upscalex, upscaley, info.round());
+    _input        = input;
+    _info         = info;
+    _inner_border = std::make_pair(inner_border_right, inner_border_top);
 
-    const TensorShape output_shape = deconvolution_output_shape(out_dims, input->info()->tensor_shape(), weights->info()->tensor_shape());
-
-    // Output auto initialization if not yet initialized
-    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+    const unsigned int stride_x = info.stride().first;
+    const unsigned int stride_y = info.stride().second;
+    auto               out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1),
+                                                                  info.pad().first, info.pad().second, inner_border_right, inner_border_top, stride_x, stride_y);
 
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights, bias);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output, weights, bias);
+    const TensorShape output_shape = deconvolution_output_shape(out_dims, input->info()->tensor_shape(), weights->info()->tensor_shape());
 
     ARM_COMPUTE_ERROR_ON_MSG(output->info()->dimension(Window::DimX) != output_shape.x(), "Output's width is invalid.");
     ARM_COMPUTE_ERROR_ON_MSG(output->info()->dimension(Window::DimY) != output_shape.y(), "Output's height is invalid.");
@@ -64,51 +67,51 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
     _memory_group.manage(&_scaled_output);
 
     // configure scale function
-    //Init and allocate intermmidiate tensor for output, same size as input but the first two axis are the same as the output tensor
-    TensorShape scale_out_shape(input->info()->tensor_shape());
-    scale_out_shape.set(0, output->info()->dimension(0));
-    scale_out_shape.set(1, output->info()->dimension(1));
-    TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+    // Init and allocate intermmidiate tensor for output, same size as input but the first two axis are the same as the output tensor
+    const TensorInfo scale_out_info(compute_deconvolution_shape(*input->info(), stride_x, stride_y, inner_border_right, inner_border_top, info), 1, input->info()->data_type(),
+                                    input->info()->fixed_point_position());
     _scaled_output.allocator()->init(scale_out_info);
-    const unsigned int kernel_size = weights->info()->dimension(0);
-    // Padding for the upsampled image is calculated with the equiation: p' = k - p - 1, where k is kernel size and p is the input padding
-    ARM_COMPUTE_ERROR_ON(info.pad().first > (kernel_size - 1));
-    const unsigned int  tr_px     = kernel_size - info.pad().first - 1;
-    const unsigned int  tr_py     = kernel_size - info.pad().second - 1;
-    const unsigned int  tr_stride = 1;
-    const PadStrideInfo transposed_info(tr_stride, tr_stride, tr_px, tr_py);
-    _scale_f.configure(input, &_scaled_output, std::make_pair(ax, ay), std::make_pair(info.stride().first - 1u, info.stride().second - 1u), transposed_info);
+
     // setup the function to convolve the upscaled output
-    switch(kernel_size)
-    {
-        case 1:
-        {
-            _conv_f.configure(&_scaled_output, weights, bias, output, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL));
-            break;
-        }
-        case 3:
-        {
-            _conv_f.configure(&_scaled_output, weights, bias, output, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL));
-            break;
-        }
-        case 5:
-        {
-            _conv_f.configure(&_scaled_output, weights, bias, output, PadStrideInfo(1, 1, 2, 2, DimensionRoundingType::CEIL));
-            break;
-        }
-        default:
-        {
-            ARM_COMPUTE_ERROR("Not supported");
-            break;
-        }
-    }
+    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+    _conv_f.configure(&_scaled_output, weights, bias, output, conv_info);
     _scaled_output.allocator()->allocate();
 }
 
 void NEDeconvolutionLayer::run()
 {
     _memory_group.acquire();
-    _scale_f.run();
+
+    // Initialize _scaled_output buffer
+    const int width_in      = _input->info()->dimension(0);
+    const int height_in     = _input->info()->dimension(1);
+    const int width_scaled  = _scaled_output.info()->dimension(0);
+    const int height_scaled = _scaled_output.info()->dimension(1);
+    const int num_2d_slices = _input->info()->tensor_shape().total_size() / (width_in * height_in);
+    const int stride_x      = _info.stride().first;
+    const int stride_y      = _info.stride().second;
+
+    std::fill_n(reinterpret_cast<float *>(_scaled_output.buffer()), _scaled_output.info()->tensor_shape().total_size(), 0.f);
+
+    // scaled_output is the input for the forward convolution. We copy the input elements to scaled_output
+    // and insert rows and columns with zeroes depending on the stride values.
+    for(int slice = 0; slice < num_2d_slices; ++slice)
+    {
+        const int start_x = _info.pad().first;
+        const int start_y = _inner_border.second + _info.pad().second;
+        const int end_y   = height_scaled - _info.pad().second;
+        const int end_x   = width_scaled - _inner_border.first - _info.pad().first;
+
+        for(int yi = start_y, in_y = 0; yi < end_y; yi += stride_y, in_y++)
+        {
+            for(int xi = start_x, in_x = 0; xi < end_x; xi += stride_x, in_x++)
+            {
+                const auto in = *(reinterpret_cast<float *>(_input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(in_x, in_y, slice))));
+                *(reinterpret_cast<float *>(_scaled_output.buffer() + _scaled_output.info()->offset_element_in_bytes(Coordinates(xi, yi, slice)))) = in;
+            }
+        }
+    }
+
     _conv_f.run();
     _memory_group.release();
 }
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayerUpsample.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayerUpsample.cpp
deleted file mode 100644
index 63f17bcb5a..0000000000
--- a/src/runtime/NEON/functions/NEDeconvolutionLayerUpsample.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h"
-
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "support/ToolchainSupport.h"
-
-#include <cmath>
-#include <cstddef>
-#include <utility>
-
-using namespace arm_compute;
-
-namespace
-{
-inline void precompute_offsets(ITensor *offsets, float wr, size_t input_element_size, const std::pair<unsigned int, unsigned int> &a,
-                               const std::pair<unsigned int, unsigned int> &iz, const PadStrideInfo &info)
-{
-    ARM_COMPUTE_ERROR_ON(nullptr == offsets);
-    Window    win;
-    const int padx          = info.pad().first;
-    const int pady          = info.pad().second;
-    const int ax            = a.first;
-    const int ay            = a.second;
-    const int offset_width  = offsets->info()->dimension(0);
-    const int offset_height = offsets->info()->dimension(1);
-    // The values of ax and ay denote the number of ZEROS to be added on the top and right inner border of the image.
-    // Step value along the XY axis will depend on the number of zeros to be inserted between samples (number of zeros + 1).
-    // Pre-compute the X offset, Y's stride is unknown at this point so we can't precompute Y's offsets
-    for(int yi = ay; yi < (offset_height - pady); yi += (1 + iz.second))
-    {
-        for(int xi = padx; xi < (offset_width - ax); xi += (1 + iz.first))
-        {
-            int         *ptr                  = reinterpret_cast<int *>(offsets->ptr_to_element(Coordinates(xi, yi)));
-            const size_t in_xi                = (xi + 0.5f) * wr;
-            *reinterpret_cast<int32_t *>(ptr) = in_xi * input_element_size;
-        }
-    }
-}
-} // namespace
-
-NEDeconvolutionLayerUpsample::NEDeconvolutionLayerUpsample(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
-    : _memory_group(std::move(memory_manager)),
-      _offsets(),
-      _border_handler(),
-      _upsample()
-{
-}
-
-void NEDeconvolutionLayerUpsample::configure(ITensor *input, ITensor *output, const std::pair<unsigned int, unsigned int> &a,
-                                             const std::pair<unsigned int, unsigned int> &iz, const PadStrideInfo &info)
-{
-    ARM_COMPUTE_ERROR_ON(nullptr == input);
-    ARM_COMPUTE_ERROR_ON(nullptr == output);
-
-    for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
-    {
-        ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i));
-    }
-
-    // Get the tensor shape
-    const TensorShape shape(output->info()->dimension(0), output->info()->dimension(1));
-
-    // Compute the ratio between source width/height and destination width/height
-    const auto wr = static_cast<float>(input->info()->dimension(0)) / static_cast<float>(output->info()->dimension(0));
-    const auto hr = static_cast<float>(input->info()->dimension(1)) / static_cast<float>(output->info()->dimension(1));
-    ARM_COMPUTE_UNUSED(hr);
-    // Get the element size of the input image
-    const size_t input_element_size = input->info()->element_size();
-
-    TensorInfo tensor_info_offsets(shape, Format::S32);
-    _offsets.allocator()->init(tensor_info_offsets);
-
-    _upsample.configure(input, &_offsets, output);
-
-    // Allocate once the configure methods have been called
-    _offsets.allocator()->allocate();
-    // Pre-compute offsets for nearest interpolation
-    std::fill_n(reinterpret_cast<int32_t *>(_offsets.buffer()), _offsets.info()->total_size() / sizeof(int32_t), -1 * input_element_size);
-    precompute_offsets(&_offsets, wr, input_element_size, a, iz, info);
-
-    _border_handler.configure(input, _upsample.border_size(), BorderMode::CONSTANT, PixelValue(0));
-}
-
-void NEDeconvolutionLayerUpsample::run()
-{
-    NEScheduler::get().schedule(&_border_handler, Window::DimZ);
-    _memory_group.acquire();
-    NEScheduler::get().schedule(&_upsample, Window::DimY);
-    _memory_group.release();
-}
diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h
index 58fba07bf8..a5e03c737f 100644
--- a/tests/datasets/ShapeDatasets.h
+++ b/tests/datasets/ShapeDatasets.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -239,7 +239,7 @@ public:
     SmallDeconvolutionShapes()
         : ShapeDataset("InputShape",
     {
-        TensorShape{ 2U, 3U, 3U, 2U },
+        TensorShape{ 4U, 3U, 3U, 2U },
                      TensorShape{ 5U, 5U, 3U },
                      TensorShape{ 11U, 13U, 4U, 3U }
     })
diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp
new file mode 100644
index 0000000000..59e85537e5
--- /dev/null
+++ b/tests/validation/CL/DeconvolutionLayer.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DeconvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
+
+const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2)
+                     * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
+
+const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1)
+                     * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
+
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DeconvolutionLayer)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, (combine(datasets::SmallDeconvolutionShapes(), framework::dataset::make("DataType", DataType::F32))),
+               input_shape, data_type)
+{
+    // Create shapes
+    const unsigned int kernel_size_x = 3;
+    const unsigned int kernel_size_y = 3;
+    const unsigned int num_kernels   = 1;
+    const TensorShape  weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
+    const TensorShape  bias_shape(num_kernels);
+    auto               out_dim      = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, 1, 1, 0, 0, 1, 1);
+    TensorShape        output_shape = deconvolution_output_shape(out_dim, input_shape, weights_shape);
+
+    // Create tensors
+    CLTensor src     = create_tensor<CLTensor>(input_shape, data_type, 1);
+    CLTensor weights = create_tensor<CLTensor>(weights_shape, data_type, 1);
+    CLTensor bias    = create_tensor<CLTensor>(bias_shape, data_type, 1);
+    CLTensor dst     = create_tensor<CLTensor>(output_shape, data_type, 1);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLDeconvolutionLayer deconv;
+    deconv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), 0, 0);
+
+    // Validate valid region
+    const ValidRegion src_valid_region     = shape_to_valid_region(input_shape);
+    const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape);
+    const ValidRegion bias_valid_region    = shape_to_valid_region(bias_shape);
+    const ValidRegion dst_valid_region     = shape_to_valid_region(output_shape);
+
+    validate(src.info()->valid_region(), src_valid_region);
+    validate(weights.info()->valid_region(), weights_valid_region);
+    validate(bias.info()->valid_region(), bias_valid_region);
+    validate(dst.info()->valid_region(), dst_valid_region);
+}
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
+    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),   // Mismatching data type
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),   // Invalid weights shape
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4),   // Non supported data type
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 11),  // Invalid bias shape
+                                            TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32, 0), // Window shrink
+                                            TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32, 0),
+                                          }),
+    framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16, 0),
+                                            TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::QS8, 5),
+                                            TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32, 11),
+                                            TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32, 0),
+                                              TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32, 0),
+                                          })),
+    framework::dataset::make("BiasInfo",  { TensorInfo(TensorShape(1U), 1, DataType::F16, 0),
+                                            TensorInfo(TensorShape(1U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(1U), 1, DataType::F32, 5),
+                                            TensorInfo(TensorShape(25U, 11U), 1, DataType::F32, 11),
+                                            TensorInfo(TensorShape(1U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                          })),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16, 0),
+                                            TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 5),
+                                            TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32, 0),
+                                          })),
+    framework::dataset::make("PadStrideInfo", { PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 1, 1),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                           })),
+    framework::dataset::make("ax",          {   1U,
+                                                1U,
+                                                1U,
+                                                1U,
+                                                0U,
+                                                0U,
+                                            })),
+   framework::dataset::make("ay",           {   1U,
+                                                1U,
+                                                1U,
+                                                1U,
+                                                0U,
+                                                0U,
+                                            })),
+    framework::dataset::make("Expected", { false, false, false, false, false, true })),
+    input_info, weights_info, bias_info, output_info, pad_info, ax, ay, expected)
+{
+    bool is_valid = bool(CLDeconvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pad_info, ax, ay));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using CLDeconvolutionLayerFixture3x3 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 3, 3>;
+
+template <typename T>
+using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 1, 1>;
+
+TEST_SUITE(Float)
+
+TEST_SUITE(FP32)
+TEST_SUITE(W3x3)
+
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(data3x3, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(W1x1)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<float>, framework::DatasetMode::ALL, combine(data1x1, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index 751a96558a..9573784d86 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,15 +44,11 @@ namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
 
-const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0,
-                     2)
-                     * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("ax", 1, 3) * framework::dataset::make("ay", 1, 3) * framework::dataset::make("NumKernels", { 1, 3 })
-                     *framework::dataset::make("ux", 1, 4) *framework::dataset::make("uy", 1, 4);
+const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2)
+                     * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
 
-const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0,
-                     1)
-                     * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("ax", 1, 3) * framework::dataset::make("ay", 1, 3) * framework::dataset::make("NumKernels", { 1, 3 })
-                     *framework::dataset::make("ux", 1, 4) *framework::dataset::make("uy", 1, 4);
+const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1)
+                     * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
 
 } // namespace
 
diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h
index e98f5e93c0..f2455f31ac 100644
--- a/tests/validation/fixtures/DeconvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,20 +43,15 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
 class DeconvolutionLayerFixtureBase : public framework::Fixture
 {
 public:
-    /*
-     *
-     * @param[in] a The number of zeros added to right and bottom edges of the input.
-     * @param[in] u How much to scale the X and Y axis.
-     */
     template <typename...>
     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
-               const std::pair<unsigned int, unsigned int> &a, const std::pair<unsigned int, unsigned int> &u, DataType data_type, int fractional_bits)
+               const std::pair<unsigned int, unsigned int> &inner_border, DataType data_type, int fractional_bits)
     {
         _fractional_bits = fractional_bits;
         _data_type       = data_type;
 
-        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, a, u, data_type, fractional_bits);
-        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, a, data_type, fractional_bits);
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, fractional_bits);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, fractional_bits);
     }
 
 protected:
@@ -75,13 +70,9 @@ protected:
                 library->fill_tensor_uniform(tensor, i);
         }
     }
-    /*
-     *
-     * @param[in] a The number of zeros added to right and bottom edges of the input.
-     * @param[in] u How much to scale the X and Y axis.
-     */
+
     TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape,
-                              const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a, const std::pair<float, float> &u, DataType data_type, int fixed_point_position)
+                              const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &inner_border, DataType data_type, int fixed_point_position)
     {
         // Create tensors
         TensorType src     = create_tensor<TensorType>(input_shape, data_type, 1, fixed_point_position);
@@ -91,7 +82,7 @@ protected:
 
         // Create and configure function
         FunctionType conv;
-        conv.configure(&src, &weights, &bias, &dst, info, a.first, a.second, u.first, u.second);
+        conv.configure(&src, &weights, &bias, &dst, info, inner_border.first, inner_border.second);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -121,7 +112,7 @@ protected:
     }
 
     SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape,
-                                      const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> a, DataType data_type, int fixed_point_position)
+                                      const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> inner_border, DataType data_type, int fixed_point_position)
     {
         // Create reference
         SimpleTensor<T> src{ input_shape, data_type, 1, fixed_point_position };
@@ -133,7 +124,7 @@ protected:
         fill(weights, 1);
         fill(bias, 2);
 
-        return reference::deconvolution_layer<T>(src, weights, bias, output_shape, info, a);
+        return reference::deconvolution_layer<T>(src, weights, bias, output_shape, info, inner_border);
     }
 
     TensorType      _target{};
@@ -148,18 +139,16 @@ class DeconvolutionValidationFixture : public DeconvolutionLayerFixtureBase<Tens
 public:
     template <typename...>
     void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady,
-               unsigned int ax, unsigned int ay, unsigned int ux, unsigned int uy, unsigned int num_kernels, DataType data_type)
+               unsigned int inner_border_right, unsigned int inner_border_top, unsigned int num_kernels, DataType data_type)
     {
         ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported");
         const TensorShape   weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
         const TensorShape   bias_shape(num_kernels);
         const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL);
-        const std::pair<unsigned int, unsigned int> a(ax, ay);
-        const std::pair<float, float>               u(ux, uy);
-        auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, padx, pady, a.first, a.second, u.first, u.second,
-                                                       DimensionRoundingType::CEIL);
+        const std::pair<unsigned int, unsigned int> inner_border(inner_border_right, inner_border_top);
+        auto        out_dim      = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, padx, pady, inner_border.first, inner_border.second, sx, sy);
         TensorShape output_shape = deconvolution_output_shape(out_dim, input_shape, weights_shape);
-        DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, a, u, data_type, 0);
+        DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, 0);
     }
 };
 
diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp
index 82c2188ade..0cf1087346 100644
--- a/tests/validation/reference/DeconvolutionLayer.cpp
+++ b/tests/validation/reference/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,26 +39,27 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens
                                     const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a)
 {
     // Create reference
+    const int   stride_x     = info.stride().first;
+    const int   stride_y     = info.stride().second;
     TensorShape scaled_shape = src.shape();
-    scaled_shape.set(0, output_shape.x());
-    scaled_shape.set(1, output_shape.y());
+    int         out_x        = src.shape().x() + (src.shape().x() - 1) * (stride_x - 1) + a.first + 2 * info.pad().first;
+    int         out_y        = src.shape().y() + (src.shape().y() - 1) * (stride_y - 1) + a.second + 2 * info.pad().second;
+    scaled_shape.set(0, out_x);
+    scaled_shape.set(1, out_y);
     SimpleTensor<T> scaled{ scaled_shape, src.data_type(), 1, src.fixed_point_position() };
 
-    const int          width_in      = src.shape().x();
-    const int          height_in     = src.shape().y();
-    const int          width_scaled  = scaled.shape().x();
-    const int          height_scaled = scaled.shape().y();
-    const int          num_2d_slices = src.shape().total_size() / (width_in * height_in);
-    const float        width_ratio   = static_cast<float>(width_in) / static_cast<float>(width_scaled);
-    const float        height_ratio  = static_cast<float>(height_in) / static_cast<float>(height_scaled);
-    const int          ax            = a.first;  // The number of zeros added to right edge of the input.
-    const int          ay            = a.second; // The number of zeros added to bottom edge of the input.
-    const unsigned int kernel_size   = weights.shape().x();
-    ARM_COMPUTE_ERROR_ON(info.pad().first > (kernel_size - 1));
-    const int transposed_convolution_padx = kernel_size - info.pad().first - 1;
-    const int transposed_convolution_pady = kernel_size - info.pad().second - 1;
-    const int stridex                     = info.stride().first;
-    const int stridey                     = info.stride().second;
+    const int width_in      = src.shape().x();
+    const int height_in     = src.shape().y();
+    const int width_scaled  = scaled.shape().x();
+    const int height_scaled = scaled.shape().y();
+    const int num_2d_slices = src.shape().total_size() / (width_in * height_in);
+    const int ax            = a.first;  // The number of zeros added to right edge of the input.
+    const int ay            = a.second; // The number of zeros added to top edge of the input.
+    ARM_COMPUTE_ERROR_ON(info.pad().first > (weights.shape().x() - 1));
+
+    ARM_COMPUTE_ERROR_ON_MSG(ax > stride_x - 1, "ax must be smaller than stride_x");
+    ARM_COMPUTE_ERROR_ON_MSG(ay > stride_y - 1, "ay must be smaller than stride_y");
+
     for(int j = 0; j < scaled.num_elements(); ++j)
     {
         scaled[j] = T(0);
@@ -68,34 +69,23 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens
     {
         const int offset_slice_in  = slice * width_in * height_in;
         const int offset_slice_out = slice * width_scaled * height_scaled;
-        for(int yi = ay; yi < height_scaled; yi += stridey)
+        const int start_x          = info.pad().first;
+        const int start_y          = ay + info.pad().second;
+        const int end_y            = height_scaled - info.pad().second;
+        const int end_x            = width_scaled - ax - info.pad().first;
+
+        for(int yi = start_y, in_y = 0; yi < end_y; yi += stride_y, in_y++)
         {
-            for(int xi = transposed_convolution_padx; xi < width_scaled; xi += stridex)
+            for(int xi = start_x, in_x = 0; xi < end_x; xi += stride_x, in_x++)
             {
-                const float x_src     = (xi + 0.5f) * width_ratio - 0.5f;
-                const float y_src     = (yi + 0.5f) * height_ratio - 0.5f;
-                T          *out       = scaled.data() + offset_slice_out + xi + yi * width_scaled;
-                const bool  in_bounds = x_src > -1 && y_src > -1 && x_src < width_in && y_src < height_in;
-                const bool  in_axy    = xi < transposed_convolution_padx || xi >= (width_scaled - ax)  // this is checking if the x coordinate is in the padded left/right area
-                                        || yi < ay || yi >= (height_scaled - transposed_convolution_pady); // like above but top and bottom padding in the upscaled XY plane
-                if(!in_axy)
-                {
-                    if(in_bounds)
-                    {
-                        const int in_scaled_x = (x_src < 0.f) ? static_cast<int>(x_src - 0.5f) : static_cast<int>(x_src + 0.5f);
-                        const int in_scaled_y = (y_src < 0.f) ? static_cast<int>(y_src - 0.5f) : static_cast<int>(y_src + 0.5f);
-                        const T *in          = src.data() + offset_slice_in + in_scaled_x + in_scaled_y * width_in;
-                        *out                  = *in;
-                    }
-                    else
-                    {
-                        *out = T(0);
-                    }
-                }
+                const T *in  = src.data() + offset_slice_in + in_y * width_in + in_x;
+                T       *out = scaled.data() + offset_slice_out + xi + yi * width_scaled;
+                *out         = *in;
             }
         }
     }
-    const PadStrideInfo conv_info(1, 1, 1, 1, DimensionRoundingType::CEIL);
+
+    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
     return convolution_layer(scaled, weights, bias, output_shape, conv_info);
 }
 
diff --git a/tests/validation/reference/DeconvolutionLayer.h b/tests/validation/reference/DeconvolutionLayer.h
index 8222e32027..c0bc1fa928 100644
--- a/tests/validation/reference/DeconvolutionLayer.h
+++ b/tests/validation/reference/DeconvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,7 +42,7 @@ namespace reference
  * bias             Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input.
  * output_shape     Output tensor shape. The output has the same number of dimensions as the @p input.
  * info             Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
- * a                The number of zeros added to right edge of the input.
+ * a                The number of zeros added to right and top edges of the input.
  *
  */
 template <typename T>
-- 
cgit v1.2.1