From 7da29b6b12ff319ed2b6e2c46588dfa1991556fb Mon Sep 17 00:00:00 2001
From: Alex Gilday <alexander.gilday@arm.com>
Date: Fri, 23 Mar 2018 14:16:00 +0000
Subject: COMPMID-1017: Implement dilated convolution in NEON, OpenCL, and GC

Change-Id: If4626ec9e215e14dffe22e80812da5bac84a52e2
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125734
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/CL/kernels/CLIm2ColKernel.h       |   6 +-
 .../core/GLES_COMPUTE/kernels/GCIm2ColKernel.h     |   6 +-
 arm_compute/core/NEON/kernels/NEIm2ColKernel.h     |   7 +-
 arm_compute/core/Size2D.h                          |  18 ++
 arm_compute/core/Utils.h                           |   4 +-
 .../runtime/CL/functions/CLConvolutionLayer.h      |  10 +-
 .../runtime/CL/functions/CLGEMMConvolutionLayer.h  |   7 +-
 .../GLES_COMPUTE/functions/GCConvolutionLayer.h    |   4 +-
 .../runtime/NEON/functions/NEConvolutionLayer.h    |  10 +-
 .../NEON/functions/NEGEMMConvolutionLayer.h        |   7 +-
 src/core/CL/cl_kernels/im2col.cl                   |   7 +-
 src/core/CL/kernels/CLIm2ColKernel.cpp             | 136 +++++-----
 .../GLES_COMPUTE/cs_shaders/convolution_layer.cs   |   5 +-
 src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp   |  12 +-
 src/core/NEON/kernels/NEIm2ColKernel.cpp           |  45 ++--
 src/core/Utils.cpp                                 |  11 +-
 src/runtime/CL/functions/CLConvolutionLayer.cpp    |  18 +-
 .../CL/functions/CLGEMMConvolutionLayer.cpp        |  16 +-
 .../GLES_COMPUTE/functions/GCConvolutionLayer.cpp  |   7 +-
 src/runtime/NEON/functions/NEConvolutionLayer.cpp  |  19 +-
 .../NEON/functions/NEGEMMConvolutionLayer.cpp      |  17 +-
 tests/benchmark/CL/DirectConvolutionLayer.cpp      |   4 +-
 .../GLES_COMPUTE/DirectConvolutionLayer.cpp        |   4 +-
 tests/benchmark/NEON/ConvolutionLayer.cpp          |   3 +-
 tests/benchmark/NEON/DirectConvolutionLayer.cpp    |   4 +-
 tests/benchmark/fixtures/ConvolutionLayerFixture.h |   4 +-
 .../fixtures/DirectConvolutionLayerFixture.h       | 101 +++++++
 tests/benchmark/fixtures/WinogradLayerFixture.h    | 100 +++++++
 tests/datasets/ConvolutionLayerDataset.h           |  25 +-
 tests/datasets/DilatedConvolutionLayerDataset.h    |  81 ++++++
 tests/validation/CL/ConvolutionLayer.cpp           |   4 +-
 tests/validation/CL/DilatedConvolutionLayer.cpp    | 289 +++++++++++++++++++++
 tests/validation/CL/LocallyConnected.cpp           |   4 +-
 tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp |   4 +-
 tests/validation/NEON/ConvolutionLayer.cpp         |   4 +-
 tests/validation/NEON/DilatedConvolutionLayer.cpp  | 274 +++++++++++++++++++
 tests/validation/NEON/LocallyConnected.cpp         |   4 +-
 .../validation/fixtures/ConvolutionLayerFixture.h  |  32 ++-
 .../fixtures/DirectConvolutionLayerFixture.h       |  14 +-
 .../DirectConvolutionLayerTensorShiftFixture.h     |  14 +-
 .../validation/fixtures/LocallyConnectedFixture.h  |   5 +-
 tests/validation/fixtures/WinogradLayerFixture.h   |   4 +-
 tests/validation/reference/Convolution3d.h         |  18 +-
 tests/validation/reference/ConvolutionLayer.cpp    |  21 +-
 tests/validation/reference/ConvolutionLayer.h      |   5 +-
 45 files changed, 1172 insertions(+), 222 deletions(-)
 create mode 100644 tests/benchmark/fixtures/DirectConvolutionLayerFixture.h
 create mode 100644 tests/benchmark/fixtures/WinogradLayerFixture.h
 create mode 100644 tests/datasets/DilatedConvolutionLayerDataset.h
 create mode 100644 tests/validation/CL/DilatedConvolutionLayer.cpp
 create mode 100644 tests/validation/NEON/DilatedConvolutionLayer.cpp

diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h
index 1ad302eedb..43812e42a3 100644
--- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h
+++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h
@@ -75,8 +75,9 @@ public:
      * @param[in]  kernel_dims The kernel dimensions (width and height).
      * @param[in]  conv_info   Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in]  has_bias    In case biases are provided expands the matrix with 1.
+     * @param[in]  dilation    (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      */
-    void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias);
+    void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U));
     /** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel
      *
      * @param[in] input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
@@ -86,10 +87,11 @@ public:
      * @param[in] kernel_dims The kernel dimensions (width and height).
      * @param[in] conv_info   Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in] has_bias    In case biases are provided expands the matrix with 1.
+     * @param[in] dilation    (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U));
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h
index c376a3d17b..c2d763fd46 100644
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h
@@ -76,8 +76,9 @@ public:
      * @param[in]  kernel_dims The kernel dimensions (width and height).
      * @param[in]  conv_info   Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in]  has_bias    In case biases are provided expands the matrix with 1.
+     * @param[in]  dilation    (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      */
-    void configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias);
+    void configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U));
 
     // Inherited methods overridden:
     void run(const Window &window) override;
@@ -91,10 +92,11 @@ public:
      * @param[in] kernel_dims The kernel dimensions (width and height).
      * @param[in] conv_info   Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in] has_bias    In case biases are provided expands the matrix with 1.
+     * @param[in] dilation    (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U));
 
 private:
     /** Run the reshape kernel optimised for the special case (stride is 1, padding is 0 and kernel's low 3 dimensions are same as input)
diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
index 1659b725bb..ecfce2436d 100644
--- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
+++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
@@ -85,9 +85,10 @@ public:
      * @param[in]  has_bias           In case biases are provided expands the matrix with 1.
      * @param[in]  is_fully_connected Determines whether this kernel will be called by @ref NEFullyConnectedLayer in order to validate the arguments
      * @param[in]  is_flatten         (Optional) Determines whether this kernel will be called by @ref NEFlattenLayer in order to validate the arguments
+     * @param[in]  dilation           (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      */
     void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
-                   bool has_bias, bool is_fully_connected = false, bool is_flatten = false);
+                   bool has_bias, bool is_fully_connected = false, bool is_flatten = false, const Size2D &dilation = Size2D(1U, 1U));
     /** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel
      *
      * @param[in] input              The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
@@ -99,11 +100,12 @@ public:
      * @param[in] has_bias           In case biases are provided expands the matrix with 1.
      * @param[in] is_fully_connected Determines whether this kernel will be called by @ref NEFullyConnectedLayer in order to validate the arguments
      * @param[in] is_flatten         (Optional) Determines whether this kernel will be called by @ref NEFlattenLayer in order to validate the arguments
+     * @param[in] dilation           (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
-                           bool has_bias, bool is_fully_connected, bool is_flatten = false);
+                           bool has_bias, bool is_fully_connected, bool is_flatten = false, const Size2D &dilation = Size2D(1U, 1U));
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
@@ -135,6 +137,7 @@ private:
     unsigned int  _kernel_width;
     unsigned int  _kernel_height;
     bool          _has_bias;
+    Size2D        _dilation;
 };
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEIM2COLKERNEL_H__ */
diff --git a/arm_compute/core/Size2D.h b/arm_compute/core/Size2D.h
index 37c4ebd041..ff7466d56f 100644
--- a/arm_compute/core/Size2D.h
+++ b/arm_compute/core/Size2D.h
@@ -70,6 +70,24 @@ public:
         return support::cpp11::to_string(width) + std::string("x") + support::cpp11::to_string(height);
     }
 
+    /** Semantic accessor for width as x.
+     *
+     * @return x.
+     */
+    size_t x() const
+    {
+        return width;
+    }
+
+    /** Semantic accessor for height as y.
+     *
+     * @return y.
+     */
+    size_t y() const
+    {
+        return height;
+    }
+
 public:
     size_t width  = {}; /**< Width of the image region or rectangle */
     size_t height = {}; /**< Height of the image region or rectangle */
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index f1e357293f..23668e0169 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -893,12 +893,14 @@ const std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsi
  * @param[in] kernel_width    Kernel width.
  * @param[in] kernel_height   Kernel height.
  * @param[in] pad_stride_info Pad and stride information.
+ * @param[in] dilation        (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
  *
  * @return A pair with the new width in the first position and the new height in the second.
  */
 const std::pair<unsigned int, unsigned int> scaled_dimensions(unsigned int width, unsigned int height,
                                                               unsigned int kernel_width, unsigned int kernel_height,
-                                                              const PadStrideInfo &pad_stride_info);
+                                                              const PadStrideInfo &pad_stride_info,
+                                                              const Size2D        &dilation = Size2D(1U, 1U));
 
 /** Convert a tensor format into a string.
  *
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index 53d59c3176..5f383b28ab 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -56,8 +56,10 @@ public:
      *                          Data types supported: Same as @p input.
      * @param[in]  conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in]  weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input.
+     * @param[in]  dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      */
-    void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo());
+    void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
+                   const Size2D &dilation = Size2D(1U, 1U));
     /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayer
      *
      * @param[in] input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -69,11 +71,12 @@ public:
      *                         Data types supported: Same as @p input.
      * @param[in] conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input.
+     * @param[in] dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                           const WeightsInfo &weights_info = WeightsInfo());
+                           const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U));
     /** Static function to check if given info will return the convolution called by @ref CLConvolutionLayer
      *
      * @param[in] input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -86,11 +89,12 @@ public:
      * @param[in] conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input.
      * @param[in] gpu_target   Specifies the @p GPUTarget.
+     * @param[in] dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      *
      * @return a status
      */
     static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                                    const WeightsInfo &weights_info, const GPUTarget gpu_target);
+                                                    const WeightsInfo &weights_info, const GPUTarget gpu_target, const Size2D &dilation = Size2D(1U, 1U));
 
     // Inherited methods overridden:
     void run() override;
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index 0548ce7be7..dc1211a51a 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -116,8 +116,10 @@ public:
      * @param[in]  conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in]  weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. If this is not part of the fully connected layer the weights
      *                          tensor has also been transposed with CLGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+     * @param[in]  dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      */
-    void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo());
+    void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
+                   const Size2D &dilation = Size2D(1U, 1U));
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer.
      *
      * @param[in]  input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -131,11 +133,12 @@ public:
      * @param[in]  conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in]  weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. If this is not part of the fully connected layer the weights
      *                          tensor has also been transposed with CLGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+     * @param[in]  dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                           const WeightsInfo &weights_info = WeightsInfo());
+                           const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U));
 
     // Inherited methods overridden:
     void run() override;
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h
index 2bac982d0c..198fa7ba94 100644
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h
@@ -99,8 +99,10 @@ public:
      * @param[in]  conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in]  weights_info Specifies if the weights tensor has been reshaped with GCWeightsReshapeKernel. If this is not part of the fully connected layer the weights
      *                          tensor has also been transposed with GCGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+     * @param[in]  dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      */
-    void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo());
+    void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
+                   const Size2D &dilation = Size2D(1U, 1U));
 
     // Inherited methods overridden:
     void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index 6ab1350b25..3e6e5abd28 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -61,8 +61,10 @@ public:
      * @param[in]  conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in]  weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
      *                          tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+     * @param[in]  dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      */
-    void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo());
+    void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
+                   const Size2D &dilation = Size2D(1U, 1U));
     /** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayer
      *
      * @param[in] input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -76,11 +78,12 @@ public:
      * @param[in] conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
      *                         tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+     * @param[in] dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                           const WeightsInfo &weights_info = WeightsInfo());
+                           const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U));
     /** Static function to check if given info will return the convolution called by @ref NEConvolutionLayer
      *
      * @param[in] input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -94,11 +97,12 @@ public:
      * @param[in] conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
      *                         tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+     * @param[in] dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      *
      * @return the Convolution Method Hint
      */
     static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                                    const WeightsInfo &weights_info = WeightsInfo());
+                                                    const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U));
 
     // Inherited methods overridden:
     void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 4ae8ee1fb3..43e9304414 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -115,8 +115,10 @@ public:
      * @param[in]  conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in]  weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
      *                          tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+     * @param[in]  dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      */
-    void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo());
+    void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
+                   const Size2D &dilation = Size2D(1U, 1U));
     /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
      *
      * @param[in] input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -130,11 +132,12 @@ public:
      * @param[in] conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights
      *                         tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+     * @param[in] dilation     (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                           const WeightsInfo &weights_info = WeightsInfo());
+                           const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U));
 
     // Inherited methods overridden:
     void run() override;
diff --git a/src/core/CL/cl_kernels/im2col.cl b/src/core/CL/cl_kernels/im2col.cl
index 75d99bda85..1e85e1b303 100644
--- a/src/core/CL/cl_kernels/im2col.cl
+++ b/src/core/CL/cl_kernels/im2col.cl
@@ -680,6 +680,7 @@ __kernel void im2col_generic_padx0_pady0_dchw(
  * @note The pad_left, pad_right, pad_top and pad_bottom must be passed at compile time using -DPAD_LEFT, -DPAD_RIGHT, -DPAD_TOP and -DPAD_BOTTOM: e.g. -DPAD_LEFT=1, -DPAD_RIGHT=2, -DPAD_TOP=3 and -DPAD_BOTTOM=2
  * @note The zero value to store in case we load values out-of-bounds must be passed at compile time using -DPAD_VALUE: e.g. -DPAD_VALUE=0.0
  * @note The stride along the X and Y directions must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y: e.g. -DSTRIDE_X=1 and -DSTRIDE_Y=1
+ * @note The dilation_x and dilation_y must be passed at compile time using -DDILATION_X and -DDILATION_Y: e.g. -DDILATION_X=1, -DDILATION_Y=1
  * @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.
  *
  * @param[in]  src_ptr                           Pointer to the source tensor. Supported data types: QS8/QASYMM8/QS16/F16/F32
@@ -722,10 +723,12 @@ __kernel void im2col_generic_dchw(
     __global DATA_TYPE *output_ptr = ((__global DATA_TYPE *)(dst_ptr + dst_offset_first_element_in_bytes + yo * dst_stride_y + batch * dst_stride_w)) + xo;
 
     // Linearize convolution elements
-    for(int y = yi, y_e = yi + KERNEL_HEIGHT; y < y_e; ++y)
+    for(int yk = 0; yk < KERNEL_HEIGHT; ++yk)
     {
-        for(int x = xi, x_e = xi + KERNEL_WIDTH; x < x_e; ++x, ++output_ptr)
+        int y = yi + yk * DILATION_Y;
+        for(int xk = 0; xk < KERNEL_WIDTH; ++xk, ++output_ptr)
         {
+            int x = xi + xk * DILATION_X;
 #if PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0
             *output_ptr = *((__global DATA_TYPE *)(input_ptr + x * src_stride_x + y * src_stride_y));
 #else  // PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 9bc4787384..cc19d3c263 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -41,11 +41,12 @@ using namespace arm_compute;
 
 namespace
 {
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, bool has_bias)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, bool has_bias, const Size2D &dilation)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias);
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
+    ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
 
     // Checks performed when output is configured
     if(output->total_size() != 0)
@@ -63,12 +64,12 @@ CLIm2ColKernel::CLIm2ColKernel()
 {
 }
 
-void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
 
     // Perform validation step
-    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), has_bias));
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), has_bias, dilation));
 
     _input       = input;
     _output      = output;
@@ -107,7 +108,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
 
         _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
                                             kernel_dims.width, kernel_dims.height,
-                                            conv_info);
+                                            conv_info, dilation);
 
         build_opts.add_option("-DKERNEL_WIDTH=" + support::cpp11::to_string(kernel_dims.width));
         build_opts.add_option("-DKERNEL_HEIGHT=" + support::cpp11::to_string(kernel_dims.height));
@@ -122,77 +123,82 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
         build_opts.add_option("-DPAD_BOTTOM=" + support::cpp11::to_string(conv_info.pad_bottom()));
         build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
         build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1)));
+        build_opts.add_option("-DDILATION_X=" + support::cpp11::to_string(dilation.x()));
+        build_opts.add_option("-DDILATION_Y=" + support::cpp11::to_string(dilation.y()));
         build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(input->info()->quantization_info().offset), "-DPAD_VALUE=0");
 
         const bool squared_im2col = kernel_dims.width == kernel_dims.height;
 
-        if(squared_im2col && !is_data_type_fixed_point(data_type))
+        if(dilation == Size2D(1U, 1U))
         {
-            // Check if we can run an optimized im2col
-            switch(kernel_dims.width)
+            if(squared_im2col && !is_data_type_fixed_point(data_type))
             {
-                case 1:
-                    // Optimized im2col1x1 if stride_x = 1 and conv_info.has_padding() = false
-                    if(conv_info.stride().first == 1 && !conv_info.has_padding())
-                    {
-                        // Set hint for LWS
+                // Check if we can run an optimized im2col
+                switch(kernel_dims.width)
+                {
+                    case 1:
+                        // Optimized im2col1x1 if stride_x = 1 and conv_info.has_padding() = false
+                        if(conv_info.stride().first == 1 && !conv_info.has_padding())
+                        {
+                            // Set hint for LWS
+                            _lws_hint                          = cl::NDRange(1, 1, 8);
+                            _num_elems_processed_per_iteration = 4;
+                            is_optimized_path                  = true;
+                            kernel_name                        = "im2col1x1_stridex1_dchw";
+                        }
+                        break;
+                    case 3:
                         _lws_hint                          = cl::NDRange(1, 1, 8);
-                        _num_elems_processed_per_iteration = 4;
+                        _num_elems_processed_per_iteration = 1;
                         is_optimized_path                  = true;
-                        kernel_name                        = "im2col1x1_stridex1_dchw";
-                    }
-                    break;
-                case 3:
-                    _lws_hint                          = cl::NDRange(1, 1, 8);
-                    _num_elems_processed_per_iteration = 1;
-                    is_optimized_path                  = true;
-                    kernel_name                        = "im2col3x3_dchw";
-                    break;
-                case 5:
-                    _num_elems_processed_per_iteration = 1;
-                    is_optimized_path                  = true;
-                    kernel_name                        = "im2col5x5_dchw";
-                    break;
-                case 11:
-                    // Optimized im2col11x11 if pad_x = pad_y = 0
-                    if(!conv_info.has_padding())
-                    {
+                        kernel_name                        = "im2col3x3_dchw";
+                        break;
+                    case 5:
                         _num_elems_processed_per_iteration = 1;
                         is_optimized_path                  = true;
-                        kernel_name                        = "im2col11x11_padx0_pady0_dchw";
-                    }
-                    break;
-                default:
-                    is_optimized_path = false;
-                    break;
-            }
-        }
-        else if(kernel_dims.width > 1 && !conv_info.has_padding())
-        {
-            _num_elems_processed_per_iteration = 1;
-            kernel_name                        = "im2col_generic_padx0_pady0_dchw";
-
-            // Optimized im2col is performed using one or more vector operations with the specified vector size
-            // and a remainder. For example, for 5x5 convolutions, im2col is performed using vectors of size 4
-            // and scalars; for 7x7 convolutions, using vectors of size 4 and vectors of size 3.
-            // Using the vector size of 4 is always safe since OpenCL supports vectors of size 2 and 3.
-            // Using the vector size of 8, however, may be faster.
-            size_t vector_size = 4;
-            // For 2x2 convolutions, use vectors of size 2. (For 3x3 convolutions, im2col_kernel3x3_padx0_pady0
-            // is used instead.)
-            if(kernel_dims.width < vector_size)
-            {
-                vector_size = kernel_dims.width;
+                        kernel_name                        = "im2col5x5_dchw";
+                        break;
+                    case 11:
+                        // Optimized im2col11x11 if pad_x = pad_y = 0
+                        if(!conv_info.has_padding())
+                        {
+                            _num_elems_processed_per_iteration = 1;
+                            is_optimized_path                  = true;
+                            kernel_name                        = "im2col11x11_padx0_pady0_dchw";
+                        }
+                        break;
+                    default:
+                        is_optimized_path = false;
+                        break;
+                }
             }
-            // Local work size and vector size optimized for the 11x11 AlexNet convolution on Bifrost.
-            if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72) && kernel_dims.width == 11)
+            else if(kernel_dims.width > 1 && !conv_info.has_padding())
             {
-                _lws_hint   = cl::NDRange(1, 1, 1);
-                vector_size = 8;
+                _num_elems_processed_per_iteration = 1;
+                kernel_name                        = "im2col_generic_padx0_pady0_dchw";
+
+                // Optimized im2col is performed using one or more vector operations with the specified vector size
+                // and a remainder. For example, for 5x5 convolutions, im2col is performed using vectors of size 4
+                // and scalars; for 7x7 convolutions, using vectors of size 4 and vectors of size 3.
+                // Using the vector size of 4 is always safe since OpenCL supports vectors of size 2 and 3.
+                // Using the vector size of 8, however, may be faster.
+                size_t vector_size = 4;
+                // For 2x2 convolutions, use vectors of size 2. (For 3x3 convolutions, im2col_kernel3x3_padx0_pady0
+                // is used instead.)
+                if(kernel_dims.width < vector_size)
+                {
+                    vector_size = kernel_dims.width;
+                }
+                // Local work size and vector size optimized for the 11x11 AlexNet convolution on Bifrost.
+                if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72) && kernel_dims.width == 11)
+                {
+                    _lws_hint   = cl::NDRange(1, 1, 1);
+                    vector_size = 8;
+                }
+                const size_t width_mod_vector_size = kernel_dims.width % vector_size;
+                build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size));
+                build_opts.add_option("-DWIDTH_MOD_VECTOR_SIZE=" + support::cpp11::to_string(width_mod_vector_size));
             }
-            const size_t width_mod_vector_size = kernel_dims.width % vector_size;
-            build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size));
-            build_opts.add_option("-DWIDTH_MOD_VECTOR_SIZE=" + support::cpp11::to_string(width_mod_vector_size));
         }
         _run_func = &CLIm2ColKernel::run_generic;
     }
@@ -206,7 +212,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
     // Create kernel
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
 
-    // Configure  kernel window
+    // Configure kernel window
     Window win;
     if(is_optimized_path)
     {
@@ -250,12 +256,12 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const
     _config_id += support::cpp11::to_string(output->info()->dimension(1));
 }
 
-Status CLIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+Status CLIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
 {
     ARM_COMPUTE_UNUSED(kernel_dims);
     ARM_COMPUTE_UNUSED(conv_info);
     ARM_COMPUTE_UNUSED(has_bias);
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, has_bias));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, has_bias, dilation));
     return Status{};
 }
 
diff --git a/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs
index 2701f5b262..ad3f14d442 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs
@@ -164,6 +164,7 @@ void main()
  * @note STRIDE_X/STRIDE_Y must be passed for stride info, e.g. "#define STRIDE_X xxx"
  * @note CONVOLVED_WIDTH/CONVOLVED_HEIGHT must be passed for convolved dimension, e.g. "#define CONVOLVED_WIDTH xxx"
  * @note SRC_WIDTH/SRC_HEIGHT must be passed for input dimension, e.g. "#define SRC_WIDTH xxx"
+ * @note DILATION_X/DILATION_Y must be passed for dilation sizes, e.g. "#define DILATION_X xxx"
  * @note In case biases will be added to the convolution "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row.
  *
  * @param[in]  src_ptr      Pointer to the source tensor. Supported data types: F16/F32
@@ -210,9 +211,9 @@ void main(void)
     uint src_pos = 0u;
 
     // Linearize convolution elements
-    for(uint y = yi, y_e = yi + uint(KERNEL_HEIGHT); y < y_e; ++y)
+    for(uint y = yi, y_e = yi + uint(KERNEL_HEIGHT) * uint(DILATION_Y); y < y_e; y += uint(DILATION_Y))
     {
-        for(uint x = xi, x_e = xi + uint(KERNEL_WIDTH); x < x_e; ++x, TENSOR_OFFSET_ADVANCE(dst_iter, 1u))
+        for(uint x = xi, x_e = xi + uint(KERNEL_WIDTH) * uint(DILATION_X); x < x_e; x += uint(DILATION_X), TENSOR_OFFSET_ADVANCE(dst_iter, 1u))
         {
 #if PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0
             src_pos = TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, x * src_attrs.stride_x + y * src_attrs.stride_y);
diff --git a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
index 47bfebcc09..eb790471fb 100644
--- a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
@@ -65,7 +65,7 @@ GCIm2ColKernel::GCIm2ColKernel()
 {
 }
 
-void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
 
@@ -98,7 +98,8 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const
                                      && (std::equal(input->info()->tensor_shape().cbegin() + 3,
                                                     input->info()->tensor_shape().cend(),
                                                     output->info()->tensor_shape().cbegin() + 1))
-                                     && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding());
+                                     && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding())
+                                     && (dilation == Size2D(1U, 1U));
 
     std::string kernel_name = "im2col_generic";
     if(!run_img2col_reduced)
@@ -111,7 +112,7 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const
         build_opts.emplace("#define IM2COL_GENERIC");
         _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
                                             kernel_dims.width, kernel_dims.height,
-                                            conv_info);
+                                            conv_info, dilation);
         _num_elems_processed_per_iteration = 2;
 
         build_opts.emplace("#define KERNEL_WIDTH " + support::cpp11::to_string(kernel_dims.width));
@@ -127,6 +128,8 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const
         build_opts.emplace("#define PAD_BOTTOM " + support::cpp11::to_string(conv_info.pad_bottom()));
         build_opts.emplace("#define SRC_WIDTH " + support::cpp11::to_string(input->info()->dimension(0)));
         build_opts.emplace("#define SRC_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1)));
+        build_opts.emplace("#define DILATION_X " + support::cpp11::to_string(dilation.x()));
+        build_opts.emplace("#define DILATION_Y " + support::cpp11::to_string(dilation.y()));
 
         _run_func = &GCIm2ColKernel::run_generic;
     }
@@ -206,11 +209,12 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const
     IGCKernel::configure(win);
 }
 
-Status GCIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+Status GCIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation)
 {
     ARM_COMPUTE_UNUSED(kernel_dims);
     ARM_COMPUTE_UNUSED(conv_info);
     ARM_COMPUTE_UNUSED(has_bias);
+    ARM_COMPUTE_UNUSED(dilation);
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
     return Status{};
 }
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index dee1608c43..348722c55d 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -45,12 +45,13 @@ using namespace arm_compute;
 namespace
 {
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
-                          bool has_bias, bool is_fully_connected, bool is_flatten)
+                          bool has_bias, bool is_fully_connected, bool is_flatten, const Size2D &dilation)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias);
+    ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
 
     if(is_flatten) /* Called by FlattenLayer */
     {
@@ -59,7 +60,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
     }
     else if(!is_fully_connected) /* Called by ConvolutionLayer */
     {
-        std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_dims.width, kernel_dims.height, conv_info);
+        std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_dims.width, kernel_dims.height, conv_info, dilation);
         ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != (input->dimension(2) * kernel_dims.area() + (has_bias ? 1 : 0)));
         ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != (out_dims.first * out_dims.second));
         ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(2) != 1);
@@ -91,11 +92,13 @@ inline void linearize_volume(const uint8_t *const in_ptr,
                              int                  input_stride_y,
                              int                  input_stride_z,
                              int                  fixed_point_position,
-                             int                  pad_value)
+                             int                  pad_value,
+                             int                  dilation_x,
+                             int                  dilation_y)
 {
     const int kernel_size2 = kernel_width * kernel_height;
-    const int x_e          = top_left_x + kernel_width;
-    const int y_e          = top_left_y + kernel_height;
+    const int x_e          = top_left_x + kernel_width * dilation_x;
+    const int y_e          = top_left_y + kernel_height * dilation_y;
 
     // Linearize volume
     int d = 0;
@@ -104,12 +107,12 @@ inline void linearize_volume(const uint8_t *const in_ptr,
     // 2) to have an optimized im2col for the first convolution layer where usually we have 3 IFMs
     for(; d <= (kernel_depth - 3); d += 3)
     {
-        for(int y = top_left_y; y < y_e; ++y)
+        for(int y = top_left_y; y < y_e; y += dilation_y)
         {
             if((y < 0 || y >= input_h) && has_pads)
             {
                 // All the values will be the offset (will be zeros when not quantized)
-                for(int x = top_left_x; x < x_e; ++x, ++out_ptr)
+                for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
                 {
                     *(out_ptr + 0 * kernel_size2) = pad_value;
                     *(out_ptr + 1 * kernel_size2) = pad_value;
@@ -118,7 +121,7 @@ inline void linearize_volume(const uint8_t *const in_ptr,
             }
             else
             {
-                for(int x = top_left_x; x < x_e; ++x, ++out_ptr)
+                for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
                 {
                     if((x < 0 || x >= input_w) && has_pads)
                     {
@@ -141,7 +144,7 @@ inline void linearize_volume(const uint8_t *const in_ptr,
     // Left over
     for(; d < kernel_depth; d++)
     {
-        for(int y = top_left_y; y < y_e; ++y)
+        for(int y = top_left_y; y < y_e; y += dilation_y)
         {
             if((y < 0 || y >= input_h) && has_pads)
             {
@@ -151,7 +154,7 @@ inline void linearize_volume(const uint8_t *const in_ptr,
             }
             else
             {
-                for(int x = top_left_x; x < x_e; ++x, ++out_ptr)
+                for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
                 {
                     if((x < 0 || x >= input_w) && has_pads)
                     {
@@ -251,7 +254,9 @@ void NEIm2ColKernel::run_generic(const Window &window)
                                       input_stride_y,
                                       input_stride_z,
                                       _input->info()->fixed_point_position(),
-                                      offset);
+                                      offset,
+                                      _dilation.x(),
+                                      _dilation.y());
     },
     in, out);
 }
@@ -309,27 +314,28 @@ void NEIm2ColKernel::run_reduced(const Window &window)
 }
 
 NEIm2ColKernel::NEIm2ColKernel()
-    : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false)
+    : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false), _dilation(1U, 1U)
 {
 }
 
 void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
-                               bool has_bias, bool is_fully_connected, bool is_flatten)
+                               bool has_bias, bool is_fully_connected, bool is_flatten, const Size2D &dilation)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
 
     // Perform validation step
     ARM_COMPUTE_UNUSED(is_fully_connected, is_flatten);
-    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten));
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten, dilation));
 
     _input          = input;
     _output         = output;
     _conv_info      = conv_info;
     _kernel_width   = kernel_dims.width;
-    _kernel_height  = kernel_dims.height,
+    _kernel_height  = kernel_dims.height;
+    _dilation       = dilation;
     _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
                                         _kernel_width, _kernel_height,
-                                        _conv_info);
+                                        _conv_info, _dilation);
     _has_bias = has_bias;
 
     unsigned int stride_x = 0;
@@ -340,7 +346,8 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
                                && (std::equal(input->info()->tensor_shape().cbegin() + 3,
                                               input->info()->tensor_shape().cend(),
                                               output->info()->tensor_shape().cbegin() + 1))
-                               && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding());
+                               && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding())
+                               && ((dilation.x() == 1) && (dilation.y() == 1));
 
     Window window = calculate_max_window(*input->info(), Steps());
 
@@ -407,9 +414,9 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
 }
 
 Status NEIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
-                                bool has_bias, bool is_fully_connected, bool is_flatten)
+                                bool has_bias, bool is_fully_connected, bool is_flatten, const Size2D &dilation)
 {
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten, dilation));
     return Status{};
 }
 
diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp
index f4b45532cf..4a237f9daa 100644
--- a/src/core/Utils.cpp
+++ b/src/core/Utils.cpp
@@ -292,7 +292,8 @@ const std::pair<unsigned int, unsigned int> arm_compute::deconvolution_output_di
 
 const std::pair<unsigned int, unsigned int> arm_compute::scaled_dimensions(unsigned int width, unsigned int height,
                                                                            unsigned int kernel_width, unsigned int kernel_height,
-                                                                           const PadStrideInfo &pad_stride_info)
+                                                                           const PadStrideInfo &pad_stride_info,
+                                                                           const Size2D        &dilation)
 {
     const unsigned int pad_left   = pad_stride_info.pad_left();
     const unsigned int pad_top    = pad_stride_info.pad_top();
@@ -305,12 +306,12 @@ const std::pair<unsigned int, unsigned int> arm_compute::scaled_dimensions(unsig
     switch(pad_stride_info.round())
     {
         case DimensionRoundingType::FLOOR:
-            w = static_cast<unsigned int>(std::floor((static_cast<float>(width + pad_left + pad_right - kernel_width) / stride_x) + 1));
-            h = static_cast<unsigned int>(std::floor((static_cast<float>(height + pad_top + pad_bottom - kernel_height) / stride_y) + 1));
+            w = static_cast<unsigned int>(std::floor((static_cast<float>(width + pad_left + pad_right - (dilation.x() * (kernel_width - 1) + 1)) / stride_x) + 1));
+            h = static_cast<unsigned int>(std::floor((static_cast<float>(height + pad_top + pad_bottom - (dilation.y() * (kernel_height - 1) + 1)) / stride_y) + 1));
             break;
         case DimensionRoundingType::CEIL:
-            w = static_cast<unsigned int>(std::ceil((static_cast<float>(width + pad_left + pad_right - kernel_width) / stride_x) + 1));
-            h = static_cast<unsigned int>(std::ceil((static_cast<float>(height + pad_top + pad_bottom - kernel_height) / stride_y) + 1));
+            w = static_cast<unsigned int>(std::ceil((static_cast<float>(width + pad_left + pad_right - (dilation.x() * (kernel_width - 1) + 1)) / stride_x) + 1));
+            h = static_cast<unsigned int>(std::ceil((static_cast<float>(height + pad_top + pad_bottom - (dilation.y() * (kernel_height - 1) + 1)) / stride_y) + 1));
             break;
         default:
             ARM_COMPUTE_ERROR("Unsupported rounding type");
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 1a486ce5c7..64bda93ff0 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -42,13 +42,14 @@ CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_ma
 {
 }
 
-void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+                                   const Size2D &dilation)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
-    ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info));
+    ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation));
 
     switch(CLConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info,
-                                                      weights_info, CLScheduler::get().target()))
+                                                      weights_info, CLScheduler::get().target(), dilation))
     {
         case ConvolutionMethod::DIRECT:
         {
@@ -60,7 +61,7 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c
         case ConvolutionMethod::GEMM:
         {
             auto f = arm_compute::support::cpp14::make_unique<CLGEMMConvolutionLayer>(_memory_manager);
-            f->configure(input, weights, biases, output, conv_info, weights_info);
+            f->configure(input, weights, biases, output, conv_info, weights_info, dilation);
             _function = std::move(f);
             break;
         }
@@ -71,14 +72,14 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c
 }
 
 Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                    const WeightsInfo &weights_info)
+                                    const WeightsInfo &weights_info, const Size2D &dilation)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
 
     //Configure if the parameters match the direct convolution or the gemm-based
     const GPUTarget gpu_target = CLScheduler::get().target();
 
-    switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, gpu_target))
+    switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, gpu_target, dilation))
     {
         case ConvolutionMethod::DIRECT:
         {
@@ -89,7 +90,7 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
         case ConvolutionMethod::GEMM:
         {
             // Validate gemm-based convolution layer
-            CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info);
+            CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation);
             break;
         }
         default:
@@ -101,7 +102,7 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
 }
 
 ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                                             const WeightsInfo &weights_info, const GPUTarget gpu_target)
+                                                             const WeightsInfo &weights_info, const GPUTarget gpu_target, const Size2D &dilation)
 {
     ARM_COMPUTE_UNUSED(input);
     ARM_COMPUTE_UNUSED(weights);
@@ -110,6 +111,7 @@ ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *
     ARM_COMPUTE_UNUSED(conv_info);
     ARM_COMPUTE_UNUSED(weights_info);
     ARM_COMPUTE_UNUSED(gpu_target);
+    ARM_COMPUTE_UNUSED(dilation);
 
     return ConvolutionMethod::GEMM;
 }
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index bc339f176f..e7ad62f5ff 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -151,7 +151,8 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens
     return Status{};
 }
 
-void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+                                       const Size2D &dilation)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
 
@@ -160,7 +161,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
                                                                 biases != nullptr ? biases->info() : nullptr,
                                                                 output->info(),
                                                                 conv_info,
-                                                                weights_info));
+                                                                weights_info,
+                                                                dilation));
 
     _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
 
@@ -187,7 +189,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
     const unsigned int kernel_width  = weights->info()->dimension(0);
     const unsigned int kernel_height = weights->info()->dimension(1);
     std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height,
-                                                 conv_info);
+                                                 conv_info, dilation);
 
     unsigned int mat_weights_cols = weights->info()->dimension(3);
     unsigned int mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + bias_element;
@@ -224,7 +226,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
     _memory_group.manage(&_gemm_output);
 
     // Configure im2col
-    _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias);
+    _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation);
 
     // Configure GEMM
     configure_mm(&_im2col_output, weights, &_gemm_output);
@@ -260,7 +262,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *
 }
 
 Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                        const WeightsInfo &weights_info)
+                                        const WeightsInfo &weights_info, const Size2D &dilation)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!");
@@ -282,7 +284,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
     const unsigned int kernel_width  = weights->dimension(0);
     const unsigned int kernel_height = weights->dimension(1);
 
-    std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height, conv_info);
+    std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height, conv_info, dilation);
 
     unsigned int mat_weights_cols = weights->dimension(3);
     unsigned int mat_weights_rows = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + bias_element;
@@ -298,7 +300,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
     shape_im2col.set(2, 1);
     TensorInfo im2col_reshaped_info(shape_im2col, 1, dt, input->fixed_point_position());
     im2col_reshaped_info.set_quantization_info(input->quantization_info());
-    CLIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias);
+    CLIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation);
 
     // Create GEMM output tensor
     TensorShape shape_gemm = im2col_reshaped_info.tensor_shape();
diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
index f4c073668a..c2b7e02284 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
@@ -102,7 +102,8 @@ void GCConvolutionLayer::configure_mm(const IGCTensor *input, const IGCTensor *w
     _mm_kernel.configure(input, weights, output, 1.f, is_interleaved_transposed);
 }
 
-void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+                                   const Size2D &dilation)
 {
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
@@ -136,7 +137,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig
     const unsigned int kernel_width  = (_are_weights_reshaped) ? weights_info.kernel_size().first : weights->info()->dimension(0);
     const unsigned int kernel_height = (_are_weights_reshaped) ? weights_info.kernel_size().second : weights->info()->dimension(1);
     std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height,
-                                                 conv_info);
+                                                 conv_info, dilation);
 
     // Check if its a "fully connected" convolution
     _is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1));
@@ -229,7 +230,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig
         input->info()->extend_padding(border_size);
         _fill_border.configure(input, border_size, BorderMode::CONSTANT, PixelValue(0)); // for PAD of im2col fp16: consider it as border
     }
-    _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias);
+    _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias, dilation);
 
     // Configure matrix multiply
     if(run_interleaved)
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index d4421e8429..e659495b7c 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -40,14 +40,15 @@ NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr<IMemoryManager> memory_ma
 {
 }
 
-void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+                                   const Size2D &dilation)
 {
     // Perform validate step
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
-    ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info));
+    ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation));
 
     switch(NEConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info,
-                                                      weights_info))
+                                                      weights_info, dilation))
     {
         case ConvolutionMethod::WINOGRAD:
         {
@@ -59,7 +60,7 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const
         case ConvolutionMethod::GEMM:
         {
             auto f = arm_compute::support::cpp14::make_unique<NEGEMMConvolutionLayer>(_memory_manager);
-            f->configure(input, weights, biases, output, conv_info, weights_info);
+            f->configure(input, weights, biases, output, conv_info, weights_info, dilation);
             _function = std::move(f);
             break;
         }
@@ -77,9 +78,9 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const
 }
 
 Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                    const WeightsInfo &weights_info)
+                                    const WeightsInfo &weights_info, const Size2D &dilation)
 {
-    switch(NEConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info))
+    switch(NEConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, dilation))
     {
         case ConvolutionMethod::WINOGRAD:
             //Validate Winograd
@@ -87,7 +88,7 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
             break;
         case ConvolutionMethod::GEMM:
             //Validate Gemm-based Convolution
-            NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info);
+            NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation);
             break;
         case ConvolutionMethod::DIRECT:
             //Validate Gemm-based Convolution
@@ -101,12 +102,12 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
 }
 
 ConvolutionMethod NEConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                                             const WeightsInfo &weights_info)
+                                                             const WeightsInfo &weights_info, const Size2D &dilation)
 {
     ARM_COMPUTE_UNUSED(output);
     ARM_COMPUTE_UNUSED(weights_info);
     if((input->data_type() == DataType::F32) && (weights->dimension(0) == 3) && (weights->dimension(1) == 3) && (weights->num_dimensions() <= 4) && (conv_info.stride().first == 1)
-       && (conv_info.stride().second == 1) && (biases != nullptr))
+       && (conv_info.stride().second == 1) && (biases != nullptr) && (dilation == Size2D(1U, 1U)))
     {
         return ConvolutionMethod::WINOGRAD;
     }
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 3b8b4243e5..d9707d95e0 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -170,7 +170,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf
                                       bool &are_weights_reshaped, unsigned int &kernel_width, unsigned int &kernel_height,
                                       bool &is_fully_connected_convolution, bool &is_interleaved, bool &is_quantized,
                                       unsigned int &mat_weights_cols, unsigned int &mat_weights_rows,
-                                      unsigned int &conv_w, unsigned int &conv_h)
+                                      unsigned int &conv_w, unsigned int &conv_h, const Size2D &dilation)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
@@ -205,7 +205,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf
     mat_weights_rows     = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + (append_bias ? 1 : 0);
 
     std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height,
-                                                 conv_info);
+                                                 conv_info, dilation);
 
     // Check if its a "fully connected" convolution
     is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1));
@@ -246,7 +246,8 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w
     }
 }
 
-void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
+                                       const Size2D &dilation)
 {
     // Perform validate step
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
@@ -262,7 +263,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
     Status status = validate_and_initialize_values(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(), conv_info, weights_info, dt, _append_bias, _are_weights_reshaped,
                                                    kernel_width, kernel_height,
                                                    _is_fully_connected_convolution, _is_interleaved, _is_quantized,
-                                                   mat_weights_cols, mat_weights_rows, conv_w, conv_h);
+                                                   mat_weights_cols, mat_weights_rows, conv_w, conv_h, dilation);
 
     ARM_COMPUTE_ERROR_THROW_ON(status);
 
@@ -362,7 +363,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
 
     // Configure kernels
     // Configure im2col
-    _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias);
+    _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias, false, false, dilation);
 
     // Configure matrix multiply
     if(run_optimised)
@@ -420,7 +421,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig
 }
 
 Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
-                                        const WeightsInfo &weights_info)
+                                        const WeightsInfo &weights_info, const Size2D &dilation)
 {
     ARM_COMPUTE_UNUSED(output);
 
@@ -439,7 +440,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
 
     Status status = validate_and_initialize_values(input, weights, biases, conv_info, weights_info, dt, append_bias, are_weights_reshaped, kernel_width, kernel_height,
                                                    is_fully_connected_convolution, is_interleaved, is_quantized, mat_weights_cols, mat_weights_rows,
-                                                   conv_w, conv_h);
+                                                   conv_w, conv_h, dilation);
 
     const Size2D kernel_weights = Size2D(kernel_width, kernel_height);
 
@@ -517,7 +518,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
     shape_im2col.set(1, mat_input_rows);
     shape_im2col.set(2, 1);
     TensorInfo im2_col_info = input->clone()->set_tensor_shape(shape_im2col);
-    ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2_col_info, kernel_weights, conv_info, append_bias, false));
+    ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2_col_info, kernel_weights, conv_info, append_bias, false, false, dilation));
 
     // Create GEMM output tensor
     TensorShape shape_gemm(im2_col_info.tensor_shape());
diff --git a/tests/benchmark/CL/DirectConvolutionLayer.cpp b/tests/benchmark/CL/DirectConvolutionLayer.cpp
index 27994b4458..c7b07807e9 100644
--- a/tests/benchmark/CL/DirectConvolutionLayer.cpp
+++ b/tests/benchmark/CL/DirectConvolutionLayer.cpp
@@ -27,7 +27,7 @@
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
 #include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
 #include "tests/CL/CLAccessor.h"
-#include "tests/benchmark/fixtures/ConvolutionLayerFixture.h"
+#include "tests/benchmark/fixtures/DirectConvolutionLayerFixture.h"
 #include "tests/datasets/system_tests/alexnet/AlexNetConvolutionLayerDataset.h"
 #include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h"
 #include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4ConvolutionLayerDataset.h"
@@ -49,7 +49,7 @@ namespace
 const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
 } // namespace
 
-using CLConvolutionLayerFixture = ConvolutionLayerFixture<CLTensor, CLDirectConvolutionLayer, CLAccessor>;
+using CLConvolutionLayerFixture = DirectConvolutionLayerFixture<CLTensor, CLDirectConvolutionLayer, CLAccessor>;
 
 TEST_SUITE(CL)
 
diff --git a/tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp b/tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp
index 784f8e85ef..d319c41088 100644
--- a/tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp
+++ b/tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp
@@ -27,7 +27,7 @@
 #include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
 #include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h"
 #include "tests/GLES_COMPUTE/GCAccessor.h"
-#include "tests/benchmark/fixtures/ConvolutionLayerFixture.h"
+#include "tests/benchmark/fixtures/DirectConvolutionLayerFixture.h"
 #include "tests/datasets/system_tests/alexnet/AlexNetConvolutionLayerDataset.h"
 #include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h"
 #include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4ConvolutionLayerDataset.h"
@@ -49,7 +49,7 @@ namespace
 const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::F16 });
 } // namespace
 
-using GCConvolutionLayerFixture = ConvolutionLayerFixture<GCTensor, GCDirectConvolutionLayer, GCAccessor>;
+using GCConvolutionLayerFixture = DirectConvolutionLayerFixture<GCTensor, GCDirectConvolutionLayer, GCAccessor>;
 
 TEST_SUITE(GC)
 
diff --git a/tests/benchmark/NEON/ConvolutionLayer.cpp b/tests/benchmark/NEON/ConvolutionLayer.cpp
index 9914d08c72..a425d95a6e 100644
--- a/tests/benchmark/NEON/ConvolutionLayer.cpp
+++ b/tests/benchmark/NEON/ConvolutionLayer.cpp
@@ -29,6 +29,7 @@
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/NEON/Accessor.h"
 #include "tests/benchmark/fixtures/ConvolutionLayerFixture.h"
+#include "tests/benchmark/fixtures/WinogradLayerFixture.h"
 #include "tests/datasets/system_tests/alexnet/AlexNetConvolutionLayerDataset.h"
 #include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h"
 #include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4ConvolutionLayerDataset.h"
@@ -61,7 +62,7 @@ using NEGEMMConvolutionLayerFixture = ConvolutionLayerFixture<Tensor, NEGEMMConv
 
 TEST_SUITE(NEON)
 #if defined(__aarch64__)
-using NEWinogradLayerFixture = ConvolutionLayerFixture<Tensor, NEWinogradLayer, Accessor>;
+using NEWinogradLayerFixture = WinogradLayerFixture<Tensor, NEWinogradLayer, Accessor>;
 
 REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetWinogradLayer, NEWinogradLayerFixture, framework::DatasetMode::ALL,
                                 framework::dataset::combine(framework::dataset::combine(datasets::AlexNetWinogradLayerDataset(), framework::dataset::make("DataType", DataType::F32)),
diff --git a/tests/benchmark/NEON/DirectConvolutionLayer.cpp b/tests/benchmark/NEON/DirectConvolutionLayer.cpp
index 67b948568f..8a17f3c03c 100644
--- a/tests/benchmark/NEON/DirectConvolutionLayer.cpp
+++ b/tests/benchmark/NEON/DirectConvolutionLayer.cpp
@@ -27,7 +27,7 @@
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/NEON/Accessor.h"
-#include "tests/benchmark/fixtures/ConvolutionLayerFixture.h"
+#include "tests/benchmark/fixtures/DirectConvolutionLayerFixture.h"
 #include "tests/datasets/DirectConvolutionLayerDataset.h"
 #include "tests/datasets/system_tests/alexnet/AlexNetConvolutionLayerDataset.h"
 #include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h"
@@ -55,7 +55,7 @@ const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
 #endif /* ARM_COMPUTE_ENABLE_F16 */
 } // namespace
 
-using NEConvolutionLayerFixture = ConvolutionLayerFixture<Tensor, NEDirectConvolutionLayer, Accessor>;
+using NEConvolutionLayerFixture = DirectConvolutionLayerFixture<Tensor, NEDirectConvolutionLayer, Accessor>;
 
 TEST_SUITE(NEON)
 
diff --git a/tests/benchmark/fixtures/ConvolutionLayerFixture.h b/tests/benchmark/fixtures/ConvolutionLayerFixture.h
index 9815040d42..7558b4c9a8 100644
--- a/tests/benchmark/fixtures/ConvolutionLayerFixture.h
+++ b/tests/benchmark/fixtures/ConvolutionLayerFixture.h
@@ -42,7 +42,7 @@ class ConvolutionLayerFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape dst_shape, PadStrideInfo info, DataType data_type, int batches)
+    void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape dst_shape, PadStrideInfo info, Size2D dilation, DataType data_type, int batches)
     {
         // Set batched in source and destination shapes
         const unsigned int fixed_point_position = 4;
@@ -57,7 +57,7 @@ public:
         dst     = create_tensor<TensorType>(dst_shape, data_type, 1, fixed_point_position);
 
         // Create and configure function
-        conv_layer.configure(&src, &weights, &biases, &dst, info);
+        conv_layer.configure(&src, &weights, &biases, &dst, info, WeightsInfo(), dilation);
 
         // Allocate tensors
         src.allocator()->allocate();
diff --git a/tests/benchmark/fixtures/DirectConvolutionLayerFixture.h b/tests/benchmark/fixtures/DirectConvolutionLayerFixture.h
new file mode 100644
index 0000000000..e3289b7fbc
--- /dev/null
+++ b/tests/benchmark/fixtures/DirectConvolutionLayerFixture.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_DIRECTCONVOLUTIONLAYERFIXTURE
+#define ARM_COMPUTE_TEST_DIRECTCONVOLUTIONLAYERFIXTURE
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+/** Fixture that can be used for NEON and CL */
+template <typename TensorType, typename Function, typename Accessor>
+class DirectConvolutionLayerFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape dst_shape, PadStrideInfo info, Size2D dilation, DataType data_type, int batches)
+    {
+        ARM_COMPUTE_UNUSED(dilation);
+
+        // Set batched in source and destination shapes
+        const unsigned int fixed_point_position = 4;
+        src_shape.set(3 /* batch */, batches);
+        dst_shape.set(3 /* batch */, batches);
+        DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+        // Create tensors
+        src     = create_tensor<TensorType>(src_shape, data_type, 1, fixed_point_position);
+        weights = create_tensor<TensorType>(weights_shape, data_type, 1, fixed_point_position);
+        biases  = create_tensor<TensorType>(biases_shape, bias_data_type, 1, fixed_point_position);
+        dst     = create_tensor<TensorType>(dst_shape, data_type, 1, fixed_point_position);
+
+        // Create and configure function
+        conv_layer.configure(&src, &weights, &biases, &dst, info);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        biases.allocator()->allocate();
+        dst.allocator()->allocate();
+    }
+
+    void run()
+    {
+        conv_layer.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
+    void teardown()
+    {
+        src.allocator()->free();
+        weights.allocator()->free();
+        biases.allocator()->free();
+        dst.allocator()->free();
+    }
+
+private:
+    TensorType src{};
+    TensorType weights{};
+    TensorType biases{};
+    TensorType dst{};
+    Function   conv_layer{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_DIRECTCONVOLUTIONLAYERFIXTURE */
diff --git a/tests/benchmark/fixtures/WinogradLayerFixture.h b/tests/benchmark/fixtures/WinogradLayerFixture.h
new file mode 100644
index 0000000000..31a1eb8e1c
--- /dev/null
+++ b/tests/benchmark/fixtures/WinogradLayerFixture.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_WINOGRADLAYERFIXTURE
+#define ARM_COMPUTE_TEST_WINOGRADLAYERFIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+/** Fixture that can be used for NEON and CL */
+template <typename TensorType, typename Function, typename Accessor>
+class WinogradLayerFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape dst_shape, PadStrideInfo info, Size2D dilation, DataType data_type, int batches)
+    {
+        ARM_COMPUTE_UNUSED(dilation);
+
+        // Set batched in source and destination shapes
+        const unsigned int fixed_point_position = 4;
+        src_shape.set(3 /* batch */, batches);
+        dst_shape.set(3 /* batch */, batches);
+        DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+        // Create tensors
+        src     = create_tensor<TensorType>(src_shape, data_type, 1, fixed_point_position);
+        weights = create_tensor<TensorType>(weights_shape, data_type, 1, fixed_point_position);
+        biases  = create_tensor<TensorType>(biases_shape, bias_data_type, 1, fixed_point_position);
+        dst     = create_tensor<TensorType>(dst_shape, data_type, 1, fixed_point_position);
+
+        // Create and configure function
+        conv_layer.configure(&src, &weights, &biases, &dst, info);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        biases.allocator()->allocate();
+        dst.allocator()->allocate();
+    }
+
+    void run()
+    {
+        conv_layer.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
+    void teardown()
+    {
+        src.allocator()->free();
+        weights.allocator()->free();
+        biases.allocator()->free();
+        dst.allocator()->free();
+    }
+
+private:
+    TensorType src{};
+    TensorType weights{};
+    TensorType biases{};
+    TensorType dst{};
+    Function   conv_layer{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_WINOGRADLAYERFIXTURE */
diff --git a/tests/datasets/ConvolutionLayerDataset.h b/tests/datasets/ConvolutionLayerDataset.h
index 6e2d2a142e..2981994555 100644
--- a/tests/datasets/ConvolutionLayerDataset.h
+++ b/tests/datasets/ConvolutionLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -38,7 +38,7 @@ namespace datasets
 class ConvolutionLayerDataset
 {
 public:
-    using type = std::tuple<TensorShape, TensorShape, TensorShape, TensorShape, PadStrideInfo>;
+    using type = std::tuple<TensorShape, TensorShape, TensorShape, TensorShape, PadStrideInfo, Size2D>;
 
     struct iterator
     {
@@ -46,12 +46,14 @@ public:
                  std::vector<TensorShape>::const_iterator   weights_it,
                  std::vector<TensorShape>::const_iterator   biases_it,
                  std::vector<TensorShape>::const_iterator   dst_it,
-                 std::vector<PadStrideInfo>::const_iterator infos_it)
+                 std::vector<PadStrideInfo>::const_iterator infos_it,
+                 std::vector<Size2D>::const_iterator        dilation_it)
             : _src_it{ std::move(src_it) },
               _weights_it{ std::move(weights_it) },
               _biases_it{ std::move(biases_it) },
               _dst_it{ std::move(dst_it) },
-              _infos_it{ std::move(infos_it) }
+              _infos_it{ std::move(infos_it) },
+              _dilation_it{ std::move(dilation_it) }
         {
         }
 
@@ -62,13 +64,14 @@ public:
             description << "Weights=" << *_weights_it << ":";
             description << "Biases=" << *_biases_it << ":";
             description << "Out=" << *_dst_it << ":";
-            description << "Info=" << *_infos_it;
+            description << "Info=" << *_infos_it << ":";
+            description << "Dilation=" << *_dilation_it;
             return description.str();
         }
 
         ConvolutionLayerDataset::type operator*() const
         {
-            return std::make_tuple(*_src_it, *_weights_it, *_biases_it, *_dst_it, *_infos_it);
+            return std::make_tuple(*_src_it, *_weights_it, *_biases_it, *_dst_it, *_infos_it, *_dilation_it);
         }
 
         iterator &operator++()
@@ -78,6 +81,7 @@ public:
             ++_biases_it;
             ++_dst_it;
             ++_infos_it;
+            ++_dilation_it;
 
             return *this;
         }
@@ -88,25 +92,27 @@ public:
         std::vector<TensorShape>::const_iterator   _biases_it;
         std::vector<TensorShape>::const_iterator   _dst_it;
         std::vector<PadStrideInfo>::const_iterator _infos_it;
+        std::vector<Size2D>::const_iterator        _dilation_it;
     };
 
     iterator begin() const
     {
-        return iterator(_src_shapes.begin(), _weight_shapes.begin(), _bias_shapes.begin(), _dst_shapes.begin(), _infos.begin());
+        return iterator(_src_shapes.begin(), _weight_shapes.begin(), _bias_shapes.begin(), _dst_shapes.begin(), _infos.begin(), _dilations.begin());
     }
 
     int size() const
     {
-        return std::min(_src_shapes.size(), std::min(_weight_shapes.size(), std::min(_bias_shapes.size(), std::min(_dst_shapes.size(), _infos.size()))));
+        return std::min(_src_shapes.size(), std::min(_weight_shapes.size(), std::min(_bias_shapes.size(), std::min(_dst_shapes.size(), std::min(_infos.size(), _dilations.size())))));
     }
 
-    void add_config(TensorShape src, TensorShape weights, TensorShape biases, TensorShape dst, PadStrideInfo info)
+    void add_config(TensorShape src, TensorShape weights, TensorShape biases, TensorShape dst, PadStrideInfo info, Size2D dilation = Size2D(1U, 1U))
     {
         _src_shapes.emplace_back(std::move(src));
         _weight_shapes.emplace_back(std::move(weights));
         _bias_shapes.emplace_back(std::move(biases));
         _dst_shapes.emplace_back(std::move(dst));
         _infos.emplace_back(std::move(info));
+        _dilations.emplace_back(std::move(dilation));
     }
 
 protected:
@@ -119,6 +125,7 @@ private:
     std::vector<TensorShape>   _bias_shapes{};
     std::vector<TensorShape>   _dst_shapes{};
     std::vector<PadStrideInfo> _infos{};
+    std::vector<Size2D>        _dilations{};
 };
 } // namespace datasets
 } // namespace test
diff --git a/tests/datasets/DilatedConvolutionLayerDataset.h b/tests/datasets/DilatedConvolutionLayerDataset.h
new file mode 100644
index 0000000000..3bbf282a31
--- /dev/null
+++ b/tests/datasets/DilatedConvolutionLayerDataset.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_DILATED_CONVOLUTION_LAYER_DATASET
+#define ARM_COMPUTE_TEST_DILATED_CONVOLUTION_LAYER_DATASET
+
+#include "utils/TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/datasets/ConvolutionLayerDataset.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class TinyDilatedConvolutionLayerDataset final : public ConvolutionLayerDataset
+{
+public:
+    TinyDilatedConvolutionLayerDataset()
+    {
+        // Batch size 1
+        add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 3U, 5U, 21U), TensorShape(21U), TensorShape(10U, 23U, 21U), PadStrideInfo(2, 1, 0, 0), Size2D(2U, 2U));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 5U, 7U, 16U), TensorShape(16U), TensorShape(11U, 10U, 16U), PadStrideInfo(3, 2, 1, 0), Size2D(1U, 2U));
+        // Batch size 4
+        add_config(TensorShape(17U, 31U, 2U, 4U), TensorShape(5U, 5U, 2U, 19U), TensorShape(19U), TensorShape(11U, 13U, 19U, 4U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 2U));
+    }
+};
+
+class SmallDilatedConvolutionLayerDataset final : public ConvolutionLayerDataset
+{
+public:
+    SmallDilatedConvolutionLayerDataset()
+    {
+        add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 3U, 5U, 21U), TensorShape(21U), TensorShape(10U, 23U, 21U), PadStrideInfo(2, 1, 0, 0), Size2D(2U, 2U));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 5U, 7U, 16U), TensorShape(16U), TensorShape(11U, 10U, 16U), PadStrideInfo(3, 2, 1, 0), Size2D(1U, 2U));
+        add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 5U, 2U, 19U), TensorShape(19U), TensorShape(11U, 15U, 19U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 1U));
+        add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 1U, 5U, 21U), TensorShape(21U), TensorShape(9U, 27U, 21U), PadStrideInfo(2, 1, 0, 0), Size2D(3U, 1U));
+        add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 3U, 2U, 19U), TensorShape(19U), TensorShape(11U, 15U, 19U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 2U));
+    }
+};
+
+class LargeDilatedConvolutionLayerDataset final : public ConvolutionLayerDataset
+{
+public:
+    LargeDilatedConvolutionLayerDataset()
+    {
+        // Batch size 1
+        add_config(TensorShape(27U, 27U, 96U), TensorShape(5U, 5U, 96U, 256U), TensorShape(256U), TensorShape(15U, 15U, 256U), PadStrideInfo(1, 1, 2, 2), Size2D(4U, 4U));
+        add_config(TensorShape(13U, 13U, 256U), TensorShape(3U, 3U, 256U, 384U), TensorShape(384U), TensorShape(11U, 9U, 384U), PadStrideInfo(1, 1, 1, 1), Size2D(2U, 3U));
+        add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 384U), TensorShape(384U), TensorShape(9U, 11U, 384U), PadStrideInfo(1, 1, 1, 1), Size2D(3U, 2U));
+        add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 256U), TensorShape(256U), TensorShape(7U, 7U, 256U), PadStrideInfo(1, 1, 1, 1), Size2D(4U, 4U));
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(7U, 7U, 3U, 64U), TensorShape(64U), TensorShape(109U, 112U, 64U), PadStrideInfo(2, 2, 3, 3), Size2D(2U, 1U));
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_DILATED_CONVOLUTION_LAYER_DATASET */
diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp
index 98d00ac47a..c50519b6ac 100644
--- a/tests/validation/CL/ConvolutionLayer.cpp
+++ b/tests/validation/CL/ConvolutionLayer.cpp
@@ -117,8 +117,10 @@ TEST_SUITE_END()
 TEST_SUITE(GEMMConvolutionLayer)
 
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallConvolutionLayerDataset(), datasets::LargeConvolutionLayerDataset()), CNNDataTypes),
-               input_shape, weights_shape, bias_shape, output_shape, info, data_type)
+               input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type)
 {
+    ARM_COMPUTE_UNUSED(dilation);
+
     // Set fixed point position data type allowed
     int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
 
diff --git a/tests/validation/CL/DilatedConvolutionLayer.cpp b/tests/validation/CL/DilatedConvolutionLayer.cpp
new file mode 100644
index 0000000000..dadae2227b
--- /dev/null
+++ b/tests/validation/CL/DilatedConvolutionLayer.cpp
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/DilatedConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ConvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float>            tolerance_f32(0.05f);                 /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+constexpr AbsoluteTolerance<float>  tolerance_fixed(1.0f);                /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
+constexpr AbsoluteTolerance<float>  tolerance_qasymm8(0.0);               /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+constexpr float                     tolerance_num = 0.07f;                /**< Tolerance number */
+
+/** CNN data types */
+const auto CNNDataTypes = framework::dataset::make("DataType",
+{
+    DataType::F16,
+    DataType::F32,
+    DataType::QS8,
+    DataType::QS16,
+    DataType::QASYMM8,
+});
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DilatedConvolutionLayer)
+
+DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
+                                                                                                   framework::dataset::make("InputInfo", { TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32, 0),
+                                                                                                           TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32, 0),
+                                                                                                           TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32, 0),
+                                                                                                           TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32, 0),
+                                                                                                           TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32, 0)
+                                                                                                                                         }),
+                                                                                                   framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32, 0),
+                                                                                                           TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32, 0),
+                                                                                                           TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
+                                                                                                           TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
+                                                                                                           TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16, 0)
+                                                                                                                                           })),
+                                                                                               framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(19U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(19U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(21U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(21U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(16U), 1, DataType::F32, 0)
+                                                                                                                                      })),
+                                                                                           framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32, 0)
+                                                                                                                                  })),
+                                                                                       framework::dataset::make("ConvInfo", { PadStrideInfo(1, 2, 1, 1),
+                                                                                                                PadStrideInfo(1, 2, 1, 1),
+                                                                                                                PadStrideInfo(1, 1, 0, 0),
+                                                                                                                PadStrideInfo(2, 1, 0, 0),
+                                                                                                                PadStrideInfo(3, 2, 1, 0)
+                                                                                                                            })),
+                                                                                   framework::dataset::make("GpuTarget", { GPUTarget::BIFROST,
+                                                                                                            GPUTarget::MIDGARD,
+                                                                                                            GPUTarget::G71,
+                                                                                                            GPUTarget::MIDGARD,
+                                                                                                            GPUTarget::BIFROST
+                                                                                                                         })),
+                                                                               framework::dataset::make("Dilation", { Size2D(1U, 1U),
+                                                                                                                      Size2D(1U, 1U),
+                                                                                                                      Size2D(1U, 1U),
+                                                                                                                      Size2D(2U, 2U),
+                                                                                                                      Size2D(3U, 3U)
+                                                                                                                    })),
+
+                                                                           framework::dataset::make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })),
+               input_info, weights_info, biases_info, output_info, conv_info, gpu_target, dilation, expected)
+{
+    ConvolutionMethod is_valid = CLConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(false),
+                                                                            &weights_info.clone()->set_is_resizable(false),
+                                                                            &biases_info.clone()->set_is_resizable(false),
+                                                                            &output_info.clone()->set_is_resizable(false), conv_info, WeightsInfo(), gpu_target, dilation);
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(GEMMDilatedConvolutionLayer)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallDilatedConvolutionLayerDataset(), datasets::LargeDilatedConvolutionLayerDataset()),
+                                                                   CNNDataTypes),
+               input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type)
+{
+    // Set fixed point position data type allowed
+    int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
+
+    auto bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+    // Create tensors
+    CLTensor src     = create_tensor<CLTensor>(input_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    CLTensor weights = create_tensor<CLTensor>(weights_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    CLTensor bias    = create_tensor<CLTensor>(bias_shape, bias_data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    CLTensor dst     = create_tensor<CLTensor>(output_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    const QuantizationInfo src_quantization_info     = src.info()->quantization_info();
+    const QuantizationInfo weights_quantization_info = weights.info()->quantization_info();
+
+    // Create and configure function
+    CLGEMMConvolutionLayer conv;
+    conv.configure(&src, &weights, &bias, &dst, info, WeightsInfo(), dilation);
+
+    // Validate valid region
+    const ValidRegion src_valid_region     = shape_to_valid_region(input_shape);
+    const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape);
+    const ValidRegion bias_valid_region    = shape_to_valid_region(bias_shape);
+    const ValidRegion dst_valid_region     = shape_to_valid_region(output_shape);
+
+    validate(src.info()->valid_region(), src_valid_region);
+    validate(weights.info()->valid_region(), weights_valid_region);
+    validate(bias.info()->valid_region(), bias_valid_region);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate QuantizationInfo
+    ARM_COMPUTE_EXPECT(src.info()->quantization_info() == src_quantization_info, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->quantization_info() == weights_quantization_info, framework::LogLevel::ERRORS);
+
+    // Validate padding
+    //TODO(COMPMID-415) Need to validate padding?
+}
+
+template <typename T>
+using CLGEMMDilatedConvolutionLayerFixture = ConvolutionValidationFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMDilatedConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
+                                                                                                                        framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMDilatedConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDilatedConvolutionLayerDataset(),
+                                                                                                                      framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                      framework::dataset::make("DataType",
+                                                                                                                              DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMDilatedConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType",
+                                                DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMDilatedConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDilatedConvolutionLayerDataset(),
+                                                                                                                       framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                       framework::dataset::make("DataType",
+                                                                                                                               DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+template <typename T>
+using CLGEMMDilatedConvolutionLayerFixedPointFixture = ConvolutionValidationFixedPointFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>;
+
+TEST_SUITE(FixedPoint)
+TEST_SUITE(QS8)
+// We test for fixed point precision [4,6]
+FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMDilatedConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyDilatedConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType",
+                                                DataType::QS8)),
+                       framework::dataset::make("FractionalBits", 4, 7)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fixed);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMDilatedConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType",
+                                                DataType::QS8)),
+                       framework::dataset::make("FractionalBits", 4, 7)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fixed);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(QS16)
+// Testing for fixed point position [1,14)
+FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMDilatedConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyDilatedConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType",
+                                                DataType::QS16)),
+                       framework::dataset::make("FractionalBits", 1, 14)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fixed);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMDilatedConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType",
+                                                DataType::QS16)),
+                       framework::dataset::make("FractionalBits", 1, 14)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fixed);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+template <typename T>
+using CLGEMMDilatedConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMDilatedConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMDilatedConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDilatedConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 0) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/LocallyConnected.cpp b/tests/validation/CL/LocallyConnected.cpp
index de79d60e7f..6387fcc64a 100644
--- a/tests/validation/CL/LocallyConnected.cpp
+++ b/tests/validation/CL/LocallyConnected.cpp
@@ -110,8 +110,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
 
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallLocallyConnectedDataset(), datasets::LargeLocallyConnectedDataset()),
                                                                    framework::dataset::make("DataType", DataType::F32)),
-               src_shape, weights_shape, bias_shape, dst_shape, info, data_type)
+               src_shape, weights_shape, bias_shape, dst_shape, info, dilation, data_type)
 {
+    ARM_COMPUTE_UNUSED(dilation);
+
     // Create tensors
     CLTensor src     = create_tensor<CLTensor>(src_shape, data_type);
     CLTensor weights = create_tensor<CLTensor>(weights_shape, data_type);
diff --git a/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp b/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp
index c66a1001e5..aaa0d159be 100644
--- a/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp
@@ -59,7 +59,7 @@ TEST_SUITE(GC)
 TEST_SUITE(ConvolutionLayer)
 
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallConvolutionLayerDataset(), datasets::LargeConvolutionLayerDataset()), CNNDataTypes),
-               input_shape, weights_shape, bias_shape, output_shape, info, data_type)
+               input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type)
 {
     // Set fixed point position data type allowed
     int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
@@ -82,7 +82,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da
 
     // Create and configure function
     GCConvolutionLayer conv;
-    conv.configure(&src, &weights, &bias, &dst, info);
+    conv.configure(&src, &weights, &bias, &dst, info, WeightsInfo(), dilation);
 
     // Validate valid region
     const ValidRegion src_valid_region     = shape_to_valid_region(input_shape);
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index 34306b381c..313e4bc4d6 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -126,8 +126,10 @@ TEST_SUITE_END()
 TEST_SUITE(GEMMConvolutionLayer)
 
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallConvolutionLayerDataset(), datasets::LargeConvolutionLayerDataset()), CNNDataTypes),
-               input_shape, weights_shape, bias_shape, output_shape, info, data_type)
+               input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type)
 {
+    ARM_COMPUTE_UNUSED(dilation);
+
     // Set fixed point position data type allowed
     int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
 
diff --git a/tests/validation/NEON/DilatedConvolutionLayer.cpp b/tests/validation/NEON/DilatedConvolutionLayer.cpp
new file mode 100644
index 0000000000..c0a72ec8cb
--- /dev/null
+++ b/tests/validation/NEON/DilatedConvolutionLayer.cpp
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/DilatedConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ConvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+const AbsoluteTolerance<float> tolerance_f16(0.01f);       /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+#endif                                                     /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+const AbsoluteTolerance<float>     tolerance_q(1.0f);      /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
+constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+
+/** CNN data types */
+const auto CNNDataTypes = framework::dataset::make("DataType",
+{
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+    DataType::F16,
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+    DataType::F32,
+    DataType::QS8,
+    DataType::QS16,
+    DataType::QASYMM8,
+});
+} // namespace
+
+TEST_SUITE(NEON)
+
+TEST_SUITE(DilatedConvolutionLayer)
+DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+                                                                                               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(8U, 8U, 2U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32, 0)
+                                                                                                                                     }),
+                                                                                               framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0),
+                                                                                                                        TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16, 0)
+                                                                                                                                       })),
+                                                                                           framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(1U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(21U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(21U), 1, DataType::F32, 0),
+                                                                                                                    TensorInfo(TensorShape(16U), 1, DataType::F32, 0)
+                                                                                                                                  })),
+                                                                                       framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(6U, 6U, 1U), 1, DataType::F32, 0),
+                                                                                                                TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32, 0),
+                                                                                                                TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0),
+                                                                                                                TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32, 0)
+                                                                                                                              })),
+                                                                                   framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
+                                                                                                            PadStrideInfo(1, 1, 0, 0),
+                                                                                                            PadStrideInfo(2, 1, 0, 0),
+                                                                                                            PadStrideInfo(3, 2, 1, 0)
+                                                                                                                        })),
+                                                                               framework::dataset::make("Dilation", { Size2D(1U, 2U),
+                                                                                                                      Size2D(2U, 1U),
+                                                                                                                      Size2D(2U, 2U),
+                                                                                                                      Size2D(3U, 3U)
+                                                                                                                    })),
+                                                                           framework::dataset::make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })),
+               input_info, weights_info, biases_info, output_info, conv_info, dilation, expected)
+{
+    ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(false),
+                                                                            &weights_info.clone()->set_is_resizable(false),
+                                                                            &biases_info.clone()->set_is_resizable(false),
+                                                                            &output_info.clone()->set_is_resizable(false),
+                                                                            conv_info, WeightsInfo(), dilation);
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(GEMMDilatedConvolutionLayer)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallDilatedConvolutionLayerDataset(), datasets::LargeDilatedConvolutionLayerDataset()),
+                                                                   CNNDataTypes),
+               input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type)
+{
+    // Set fixed point position data type allowed
+    int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
+
+    auto bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+    // Create tensors
+    Tensor src     = create_tensor<Tensor>(input_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    Tensor weights = create_tensor<Tensor>(weights_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    Tensor bias    = create_tensor<Tensor>(bias_shape, bias_data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    Tensor dst     = create_tensor<Tensor>(output_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    const QuantizationInfo src_quantization_info     = src.info()->quantization_info();
+    const QuantizationInfo weights_quantization_info = weights.info()->quantization_info();
+
+    // Create and configure function
+    NEGEMMConvolutionLayer conv;
+    conv.configure(&src, &weights, &bias, &dst, info, WeightsInfo(), dilation);
+
+    // Validate valid region
+    const ValidRegion src_valid_region     = shape_to_valid_region(input_shape);
+    const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape);
+    const ValidRegion bias_valid_region    = shape_to_valid_region(bias_shape);
+    const ValidRegion dst_valid_region     = shape_to_valid_region(output_shape);
+
+    validate(src.info()->valid_region(), src_valid_region);
+    validate(weights.info()->valid_region(), weights_valid_region);
+    validate(bias.info()->valid_region(), bias_valid_region);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate QuantizationInfo
+    ARM_COMPUTE_EXPECT(src.info()->quantization_info() == src_quantization_info, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->quantization_info() == weights_quantization_info, framework::LogLevel::ERRORS);
+
+    // Validate padding
+    //TODO(COMPMID-415) Need to validate padding?
+}
+
+template <typename T>
+using NEGEMMDilatedConvolutionLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T>;
+
+TEST_SUITE(Float)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
+                                                                                                                        framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                                        framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMDilatedConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDilatedConvolutionLayerDataset(),
+                                                                                                                      framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                                      framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END()
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true, false })),
+                       framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMDilatedConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDilatedConvolutionLayerDataset(),
+                                                                                                                       framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                                       framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+template <typename T>
+using NEGEMMDilatedConvolutionLayerFixedPointFixture = ConvolutionValidationFixedPointFixture<Tensor, Accessor, NEGEMMConvolutionLayer, T>;
+
+TEST_SUITE(FixedPoint)
+TEST_SUITE(QS8)
+// We test for fixed point precision [4,6]
+FIXTURE_DATA_TEST_CASE(RunTiny, NEGEMMDilatedConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyDilatedConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true, false })),
+                       framework::dataset::make("DataType", DataType::QS8)),
+                       framework::dataset::make("FractionalBits", 4, 7)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_q);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true, false })),
+                       framework::dataset::make("DataType", DataType::QS8)),
+                       framework::dataset::make("FractionalBits", 4, 7)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_q);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(QS16)
+// Testing for fixed point position [1,14)
+FIXTURE_DATA_TEST_CASE(RunTiny, NEGEMMDilatedConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyDilatedConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true, false })),
+                       framework::dataset::make("DataType", DataType::QS16)),
+                       framework::dataset::make("FractionalBits", 1, 14)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_q);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true, false })),
+                       framework::dataset::make("DataType", DataType::QS16)),
+                       framework::dataset::make("FractionalBits", 1, 14)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_q);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+template <typename T>
+using NEGEMMDilatedConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEGEMMConvolutionLayer, T>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMDilatedConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDilatedConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/LocallyConnected.cpp b/tests/validation/NEON/LocallyConnected.cpp
index b00f27445c..0c36ff6c85 100644
--- a/tests/validation/NEON/LocallyConnected.cpp
+++ b/tests/validation/NEON/LocallyConnected.cpp
@@ -111,8 +111,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
 
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallLocallyConnectedDataset(), datasets::LargeLocallyConnectedDataset()),
                                                                    framework::dataset::make("DataType", DataType::F32)),
-               src_shape, weights_shape, bias_shape, dst_shape, info, data_type)
+               src_shape, weights_shape, bias_shape, dst_shape, info, dilation, data_type)
 {
+    ARM_COMPUTE_UNUSED(dilation);
+
     // Create tensors
     Tensor src     = create_tensor<Tensor>(src_shape, data_type);
     Tensor weights = create_tensor<Tensor>(weights_shape, data_type);
diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h
index 48b4665fe7..6a100acef3 100644
--- a/tests/validation/fixtures/ConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/ConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,7 +54,7 @@ public:
 
 public:
     template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, bool reshape_weights,
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights,
                DataType data_type, int fractional_bits, QuantizationInfo quantization_info)
     {
         _data_type         = data_type;
@@ -63,8 +63,8 @@ public:
         _fractional_bits   = fractional_bits;
         _quantization_info = quantization_info;
 
-        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights);
-        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info);
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, dilation);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, dilation);
     }
 
 protected:
@@ -98,7 +98,7 @@ protected:
     }
 
     TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
-                              bool reshape_weights)
+                              bool reshape_weights, const Size2D &dilation)
     {
         WeightsInfo weights_info(!reshape_weights, weights_shape.x(), weights_shape.y(), weights_shape[3]);
         TensorShape reshaped_weights_shape(weights_shape);
@@ -144,7 +144,7 @@ protected:
 
         // Create and configure function
         FunctionType conv;
-        conv.configure(&src, &weights, &bias, &dst, info, weights_info);
+        conv.configure(&src, &weights, &bias, &dst, info, weights_info, dilation);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -220,7 +220,8 @@ protected:
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info)
+    SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
+                                      const Size2D &dilation)
     {
         // Create reference
         SimpleTensor<T>     src{ input_shape, _data_type, 1, _fractional_bits, _quantization_info };
@@ -232,7 +233,7 @@ protected:
         fill(weights, 1);
         fill(bias, 2);
 
-        return reference::convolution_layer<T>(src, weights, bias, output_shape, info);
+        return reference::convolution_layer<T>(src, weights, bias, output_shape, info, dilation);
     }
 
     TensorType       _target{};
@@ -293,9 +294,10 @@ class ConvolutionValidationFixture : public ConvolutionValidationGenericFixture<
 {
 public:
     template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, bool reshape_weights, DataType data_type)
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type)
     {
-        ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, data_type, 0, QuantizationInfo());
+        ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, data_type, 0,
+                                                                                              QuantizationInfo());
     }
 };
 
@@ -304,9 +306,10 @@ class ConvolutionValidationFixedPointFixture : public ConvolutionValidationGener
 {
 public:
     template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, bool reshape_weights, DataType data_type, int fractional_bits)
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type,
+               int fractional_bits)
     {
-        ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, data_type, fractional_bits,
+        ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, data_type, fractional_bits,
                                                                                               QuantizationInfo());
     }
 };
@@ -316,10 +319,11 @@ class ConvolutionValidationQuantizedFixture : public ConvolutionValidationGeneri
 {
 public:
     template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, bool reshape_weights, DataType data_type,
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type,
                QuantizationInfo quantization_info)
     {
-        ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, data_type, 0, quantization_info);
+        ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, data_type, 0,
+                                                                                              quantization_info);
     }
 };
 } // namespace validation
diff --git a/tests/validation/fixtures/DirectConvolutionLayerFixture.h b/tests/validation/fixtures/DirectConvolutionLayerFixture.h
index d63a5bcdba..fef9d2dc6e 100644
--- a/tests/validation/fixtures/DirectConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DirectConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -66,9 +66,11 @@ public:
     }
 
     template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation,
                DataType data_type, int fractional_bits, QuantizationInfo quantization_info)
     {
+        ARM_COMPUTE_UNUSED(dilation);
+
         _fractional_bits   = fractional_bits;
         _quantization_info = quantization_info;
         _data_type         = data_type;
@@ -226,10 +228,10 @@ class DirectConvolutionValidationWithTensorShapesQuantizedFixture : public Direc
 {
 public:
     template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation,
                DataType data_type, QuantizationInfo quantization_info)
     {
-        DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, quantization_info);
+        DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type, 0, quantization_info);
     }
 };
 
@@ -238,10 +240,10 @@ class DirectConvolutionValidationWithTensorShapesFixture : public DirectConvolut
 {
 public:
     template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation,
                DataType data_type)
     {
-        DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, QuantizationInfo());
+        DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type, 0, QuantizationInfo());
     }
 };
 
diff --git a/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h b/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h
index d810a765cb..09b6d830b4 100644
--- a/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h
+++ b/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h
@@ -67,9 +67,11 @@ public:
     }
 
     template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, unsigned int dilation_x, unsigned int dilation_y,
                DataType data_type, int fractional_bits, QuantizationInfo quantization_info)
     {
+        ARM_COMPUTE_UNUSED(dilation_x, dilation_y);
+
         _fractional_bits   = fractional_bits;
         _quantization_info = quantization_info;
         _data_type         = data_type;
@@ -245,10 +247,11 @@ class DirectConvolutionValidationWithTensorShapesQuantizedTensorShiftFixture : p
 {
 public:
     template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, unsigned int dilation_x, unsigned int dilation_y,
                DataType data_type, QuantizationInfo quantization_info)
     {
-        DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, quantization_info);
+        DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation_x, dilation_y, data_type, 0,
+                                                                                                               quantization_info);
     }
 };
 
@@ -257,10 +260,11 @@ class DirectConvolutionValidationWithTensorShapesTensorShiftFixture : public Dir
 {
 public:
     template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, unsigned int dilation_x, unsigned int dilation_y,
                DataType data_type)
     {
-        DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, QuantizationInfo());
+        DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation_x, dilation_y, data_type, 0,
+                                                                                                               QuantizationInfo());
     }
 };
 
diff --git a/tests/validation/fixtures/LocallyConnectedFixture.h b/tests/validation/fixtures/LocallyConnectedFixture.h
index ab9819e56f..f87e6e470c 100644
--- a/tests/validation/fixtures/LocallyConnectedFixture.h
+++ b/tests/validation/fixtures/LocallyConnectedFixture.h
@@ -24,6 +24,7 @@
 #ifndef ARM_COMPUTE_TEST_LOCALLY_CONNECTED_FIXTURE
 #define ARM_COMPUTE_TEST_LOCALLY_CONNECTED_FIXTURE
 
+#include "arm_compute/core/Error.h"
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
@@ -54,8 +55,10 @@ public:
 
 public:
     template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, DataType data_type)
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataType data_type)
     {
+        ARM_COMPUTE_UNUSED(dilation);
+
         _data_type      = data_type;
         _bias_data_type = data_type;
 
diff --git a/tests/validation/fixtures/WinogradLayerFixture.h b/tests/validation/fixtures/WinogradLayerFixture.h
index c427f8d20e..a86f24f35e 100644
--- a/tests/validation/fixtures/WinogradLayerFixture.h
+++ b/tests/validation/fixtures/WinogradLayerFixture.h
@@ -52,8 +52,10 @@ class WinogradConvolutionLayerValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, DataType data_type)
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataType data_type)
     {
+        ARM_COMPUTE_UNUSED(dilation);
+
         _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type);
         _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type);
     }
diff --git a/tests/validation/reference/Convolution3d.h b/tests/validation/reference/Convolution3d.h
index b99d534635..700175880b 100644
--- a/tests/validation/reference/Convolution3d.h
+++ b/tests/validation/reference/Convolution3d.h
@@ -46,7 +46,7 @@ inline bool is_valid_pixel(int i, int min, int max)
 template < typename T, typename TB, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 >
 inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out,
                           int i_offset, int w_offset, int b_offset, int o_offset,
-                          int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights)
+                          int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x = 1, int dilation_y = 1)
 {
     const T *in_ptr  = in.data() + i_offset;
     const T *w_ptr   = weights.data() + w_offset;
@@ -73,12 +73,12 @@ inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weig
             for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
             {
                 // Check if the pixel is out-of-bound
-                if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
+                if(is_valid_pixel(xi + xk * dilation_x, 0, width_in) && is_valid_pixel(yi + yk * dilation_y, 0, height_in))
                 {
                     const int idx = xk + half_width_weights_start;
                     const int idy = yk + half_height_weights_start;
 
-                    const T i_value = in_ptr[offset_slice_in + xk + yk * width_in];
+                    const T i_value = in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in];
                     const T w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
 
                     acc += i_value * w_value;
@@ -95,7 +95,7 @@ inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weig
 template < typename T, typename TB, typename std::enable_if < std::is_integral<T>::value &&std::is_integral<TB>::value, int >::type = 0 >
 inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out,
                           int i_offset, int w_offset, int b_offset, int o_offset,
-                          int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights)
+                          int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x = 1, int dilation_y = 1)
 {
     const T *in_ptr               = in.data() + i_offset;
     const T *w_ptr                = weights.data() + w_offset;
@@ -126,12 +126,12 @@ inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weig
             for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
             {
                 // Check if the pixel is out-of-bound
-                if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
+                if(is_valid_pixel(xi + xk * dilation_x, 0, width_in) && is_valid_pixel(yi + yk * dilation_y, 0, height_in))
                 {
                     const int idx = xk + half_width_weights_start;
                     const int idy = yk + half_height_weights_start;
 
-                    const fixed_point<promoted_type> i_value(in_ptr[offset_slice_in + xk + yk * width_in], fixed_point_position, true);
+                    const fixed_point<promoted_type> i_value(in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in], fixed_point_position, true);
                     const fixed_point<promoted_type> w_value(w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights], fixed_point_position, true);
                     const fixed_point<promoted_type> iw = i_value * w_value;
                     acc                                 = iw + acc;
@@ -153,7 +153,7 @@ inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weig
 template <>
 inline void convolution3d(const SimpleTensor<uint8_t> &in, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &out,
                           int i_offset, int w_offset, int b_offset, int o_offset,
-                          int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights)
+                          int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x, int dilation_y)
 {
     const uint8_t *in_ptr  = in.data() + i_offset;
     const uint8_t *w_ptr   = weights.data() + w_offset;
@@ -192,12 +192,12 @@ inline void convolution3d(const SimpleTensor<uint8_t> &in, const SimpleTensor<ui
             for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk)
             {
                 // Check if the pixel is out-of-bound
-                if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
+                if(is_valid_pixel(xi + xk * dilation_x, 0, width_in) && is_valid_pixel(yi + yk * dilation_y, 0, height_in))
                 {
                     const int idx = xk + half_width_weights_start;
                     const int idy = yk + half_height_weights_start;
 
-                    const uint8_t i_value = in_ptr[offset_slice_in + xk + yk * width_in];
+                    const uint8_t i_value = in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in];
                     const uint8_t w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
 
                     acc += (i_value + input_offset) * (w_value + weights_offset);
diff --git a/tests/validation/reference/ConvolutionLayer.cpp b/tests/validation/reference/ConvolutionLayer.cpp
index f3db274935..617e85c8c2 100644
--- a/tests/validation/reference/ConvolutionLayer.cpp
+++ b/tests/validation/reference/ConvolutionLayer.cpp
@@ -46,7 +46,8 @@ namespace
 } // namespace
 
 template <typename T, typename TB>
-SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info)
+SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info,
+                                  const Size2D &dilation)
 {
     // Create reference
     SimpleTensor<T> dst{ output_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
@@ -66,10 +67,10 @@ SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor
     const int stride_xi      = info.stride().first;
     const int stride_yi      = info.stride().second;
 
-    auto output_wh = scaled_dimensions(width_in, height_in, width_weights, height_weights, info);
+    auto output_wh = scaled_dimensions(width_in, height_in, width_weights, height_weights, info, dilation);
 
-    const int start_xi    = width_weights / 2 - pad_left;
-    const int start_yi    = height_weights / 2 - pad_top;
+    const int start_xi    = (dilation.x() * (width_weights - 1) + 1) / 2 - pad_left;
+    const int start_yi    = (dilation.y() * (height_weights - 1) + 1) / 2 - pad_top;
     const int end_xi      = output_wh.first * stride_xi;
     const int end_yi      = output_wh.second * stride_yi;
     const int num_batches = src.shape().total_size() / (width_in * height_in * depth_in);
@@ -96,7 +97,7 @@ SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor
                                                           offset_in, ofm * width_weights * height_weights * depth_weights, ofm, offset_out,
                                                           xi, yi,
                                                           width_in, height_in, depth_in,
-                                                          width_weights, height_weights);
+                                                          width_weights, height_weights, dilation.x(), dilation.y());
                 }
             }
         }
@@ -106,15 +107,15 @@ SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor
 }
 
 template SimpleTensor<float> convolution_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &output_shape,
-                                               const PadStrideInfo &info);
+                                               const PadStrideInfo &info, const Size2D &dilation);
 template SimpleTensor<half> convolution_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &output_shape,
-                                              const PadStrideInfo &info);
+                                              const PadStrideInfo &info, const Size2D &dilation);
 template SimpleTensor<qint8_t> convolution_layer(const SimpleTensor<qint8_t> &src, const SimpleTensor<qint8_t> &weights, const SimpleTensor<qint8_t> &bias, const TensorShape &output_shape,
-                                                 const PadStrideInfo &info);
+                                                 const PadStrideInfo &info, const Size2D &dilation);
 template SimpleTensor<qint16_t> convolution_layer(const SimpleTensor<qint16_t> &src, const SimpleTensor<qint16_t> &weights, const SimpleTensor<qint16_t> &bias, const TensorShape &output_shape,
-                                                  const PadStrideInfo &info);
+                                                  const PadStrideInfo &info, const Size2D &dilation);
 template SimpleTensor<uint8_t> convolution_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape,
-                                                 const PadStrideInfo &info);
+                                                 const PadStrideInfo &info, const Size2D &dilation);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/ConvolutionLayer.h b/tests/validation/reference/ConvolutionLayer.h
index 57455ba401..ff3b1531f4 100644
--- a/tests/validation/reference/ConvolutionLayer.h
+++ b/tests/validation/reference/ConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,8 @@ namespace validation
 namespace reference
 {
 template <typename T, typename TB>
-SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info);
+SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info,
+                                  const Size2D &dilation = Size2D(1U, 1U));
 } // namespace reference
 } // namespace validation
 } // namespace test
-- 
cgit v1.2.1