From c357c47be8a3f210f9eee9a05cc13f1051b036d3 Mon Sep 17 00:00:00 2001
From: Alex Gilday <alexander.gilday@arm.com>
Date: Wed, 21 Mar 2018 13:54:09 +0000
Subject: COMPMID-1008: Fix Doxygen issues

Change-Id: Ie73d8771f85d1f5b059f3a56f1bbd73c98e94a38
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/124723
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/AccessWindowAutoPadding.h         |  15 +-
 arm_compute/core/AccessWindowStatic.h              |  13 +-
 arm_compute/core/CL/CLHelpers.h                    |   4 +-
 arm_compute/core/CL/CLKernelLibrary.h              |  26 ++-
 arm_compute/core/CL/CLTypes.h                      |   2 +-
 arm_compute/core/CL/ICLArray.h                     |  48 +++--
 arm_compute/core/CL/ICLDistribution1D.h            |   6 +-
 arm_compute/core/CL/ICLHOG.h                       |   6 +-
 arm_compute/core/CL/ICLKernel.h                    |   8 +
 arm_compute/core/CL/ICLMultiImage.h                |   7 +-
 arm_compute/core/CL/ICLSimpleKernel.h              |  10 +-
 arm_compute/core/CL/ICLTensor.h                    |  12 +-
 arm_compute/core/CL/OpenCL.h                       |  19 ++
 .../core/CL/kernels/CLAbsoluteDifferenceKernel.h   |  10 +-
 .../CL/kernels/CLBatchNormalizationLayerKernel.h   |   2 +-
 arm_compute/core/CL/kernels/CLBitwiseAndKernel.h   |   6 +-
 arm_compute/core/CL/kernels/CLBitwiseOrKernel.h    |   6 +-
 arm_compute/core/CL/kernels/CLBitwiseXorKernel.h   |   6 +-
 arm_compute/core/CL/kernels/CLCannyEdgeKernel.h    |   6 +-
 .../kernels/CLDeconvolutionLayerUpsampleKernel.h   |   4 +-
 .../kernels/CLDepthwiseConvolutionLayer3x3Kernel.h |   2 +-
 .../core/CL/kernels/CLDequantizationLayerKernel.h  |   2 +-
 arm_compute/core/CL/kernels/CLDerivativeKernel.h   |   6 +-
 .../core/CL/kernels/CLIntegralImageKernel.h        |   6 +-
 arm_compute/core/CL/kernels/CLLKTrackerKernel.h    |   9 +-
 .../core/CL/kernels/CLMagnitudePhaseKernel.h       |   6 +-
 .../core/CL/kernels/CLNormalizationLayerKernel.h   |   4 +-
 .../CL/kernels/CLPixelWiseMultiplicationKernel.h   |   4 +-
 .../core/CL/kernels/CLQuantizationLayerKernel.h    |   2 +-
 arm_compute/core/CL/kernels/CLRemapKernel.h        |   6 +-
 arm_compute/core/CL/kernels/CLScharr3x3Kernel.h    |   6 +-
 arm_compute/core/CL/kernels/CLSobel3x3Kernel.h     |   6 +-
 arm_compute/core/CL/kernels/CLSobel5x5Kernel.h     |  10 +-
 arm_compute/core/CL/kernels/CLSobel7x7Kernel.h     |  10 +-
 arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h |   7 +-
 .../core/CL/kernels/CLWeightsReshapeKernel.h       |  23 +++
 arm_compute/core/CPP/CPPTypes.h                    |  16 +-
 arm_compute/core/CPP/ICPPSimpleKernel.h            |   4 +-
 .../core/CPP/kernels/CPPCornerCandidatesKernel.h   |   1 +
 .../CPPDetectionWindowNonMaximaSuppressionKernel.h |   2 +-
 arm_compute/core/Dimensions.h                      |  46 ++++-
 arm_compute/core/Error.h                           |   2 +
 arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h    |  19 +-
 arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h    |  10 +-
 arm_compute/core/GLES_COMPUTE/IGCTensor.h          |   5 +-
 .../kernels/GCAbsoluteDifferenceKernel.h           |  10 +-
 .../kernels/GCBatchNormalizationLayerKernel.h      |   2 +-
 .../kernels/GCDepthwiseConvolutionLayer3x3Kernel.h |   4 +-
 .../kernels/GCDirectConvolutionLayerKernel.h       |   5 +-
 .../kernels/GCNormalizationLayerKernel.h           |   4 +-
 .../kernels/GCNormalizePlanarYUVLayerKernel.h      |   4 +-
 .../kernels/GCPixelWiseMultiplicationKernel.h      |   6 +-
 .../GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h  |  23 +++
 arm_compute/core/Helpers.h                         |  25 +--
 arm_compute/core/IAccessWindow.h                   |  30 ++-
 arm_compute/core/IArray.h                          |  33 ++--
 arm_compute/core/NEON/INEKernel.h                  |   3 +-
 arm_compute/core/NEON/INESimpleKernel.h            |   3 +-
 arm_compute/core/NEON/NEColorConvertHelper.inl     | 102 +++++++++-
 arm_compute/core/NEON/NEFixedPoint.inl             |   4 +-
 arm_compute/core/NEON/NEMath.inl                   |  15 +-
 arm_compute/core/NEON/kernels/NEAccumulateKernel.h |   1 +
 .../NEON/kernels/NEBatchNormalizationLayerKernel.h |   2 +-
 arm_compute/core/NEON/kernels/NEBox3x3Kernel.h     |   3 +-
 arm_compute/core/NEON/kernels/NECannyEdgeKernel.h  |   3 +-
 .../kernels/NEDepthwiseConvolutionLayer3x3Kernel.h |   2 +-
 .../NEON/kernels/NEDequantizationLayerKernel.h     |   2 +-
 .../kernels/NEGEMMMatrixVectorMultiplyKernel.h     |   1 +
 .../core/NEON/kernels/NEHarrisCornersKernel.h      |   1 +
 arm_compute/core/NEON/kernels/NELKTrackerKernel.h  |   1 +
 .../core/NEON/kernels/NEMagnitudePhaseKernel.h     |   1 +
 .../NEON/kernels/NENonMaximaSuppression3x3Kernel.h |   3 +-
 .../core/NEON/kernels/NENormalizationLayerKernel.h |   2 +-
 .../core/NEON/kernels/NEQuantizationLayerKernel.h  |   2 +-
 .../core/NEON/kernels/NEWinogradLayerKernel.h      |  37 +++-
 .../kernels/detail/NEActivationFunctionDetail.h    |  51 ++++-
 arm_compute/core/NEON/wrapper/traits.h             |   4 +-
 arm_compute/core/QAsymm8.inl                       |   4 +-
 arm_compute/core/TensorInfo.h                      |   2 +
 arm_compute/core/Types.h                           | 218 ++++++++++++++++-----
 arm_compute/core/Utils.h                           |  25 ++-
 arm_compute/core/Validate.h                        |  21 +-
 arm_compute/core/Window.h                          |  10 +-
 arm_compute/core/utils/logging/LoggerRegistry.h    |   6 +-
 arm_compute/core/utils/logging/Types.h             |  13 +-
 arm_compute/core/utils/misc/CRTP.h                 |   1 +
 arm_compute/core/utils/misc/Iterable.h             |  16 ++
 .../core/utils/strong_type/StrongTypeAttributes.h  |   2 +
 arm_compute/graph/INode.h                          |   2 +
 arm_compute/graph/IOperation.h                     |   4 +-
 arm_compute/graph/ITensorObject.h                  |   8 +-
 arm_compute/graph2/GraphContext.h                  |   4 +-
 arm_compute/graph2/INodeVisitor.h                  | 100 ++++++++--
 arm_compute/graph2/TensorDescriptor.h              |   6 +-
 arm_compute/graph2/Types.h                         |   8 +-
 .../graph2/backends/NEON/NEFunctionFactory.h       |   1 +
 arm_compute/graph2/frontend/ILayer.h               |  11 +-
 arm_compute/graph2/frontend/Layers.h               |  66 ++++++-
 arm_compute/graph2/nodes/ActivationLayerNode.h     |   1 +
 .../graph2/nodes/BatchNormalizationLayerNode.h     |   1 +
 arm_compute/graph2/nodes/ConstNode.h               |   1 +
 arm_compute/graph2/nodes/ConvolutionLayerNode.h    |   1 +
 .../graph2/nodes/DepthConcatenateLayerNode.h       |   1 +
 .../graph2/nodes/DepthwiseConvolutionLayerNode.h   |   1 +
 arm_compute/graph2/nodes/EltwiseLayerNode.h        |   1 +
 arm_compute/graph2/nodes/FlattenLayerNode.h        |   1 +
 arm_compute/graph2/nodes/FullyConnectedLayerNode.h |   1 +
 arm_compute/graph2/nodes/InputNode.h               |   1 +
 arm_compute/graph2/nodes/NormalizationLayerNode.h  |   1 +
 arm_compute/graph2/nodes/OutputNode.h              |   1 +
 arm_compute/graph2/nodes/PoolingLayerNode.h        |   1 +
 arm_compute/graph2/nodes/ReshapeLayerNode.h        |   1 +
 arm_compute/graph2/nodes/SoftmaxLayerNode.h        |   1 +
 arm_compute/graph2/printers/DotGraphPrinter.h      |   1 +
 arm_compute/runtime/Array.h                        |  32 ++-
 arm_compute/runtime/CL/CLArray.h                   |  37 ++--
 arm_compute/runtime/CL/CLDistribution1D.h          |   6 +-
 arm_compute/runtime/CL/CLLutAllocator.h            |  16 +-
 arm_compute/runtime/CL/CLMemoryGroup.h             |   3 +-
 arm_compute/runtime/CL/CLTensor.h                  |   3 +-
 arm_compute/runtime/CL/CLTensorAllocator.h         |  22 ++-
 arm_compute/runtime/CL/CLTuner.h                   |   3 +-
 arm_compute/runtime/CL/functions/CLGEMM.h          |   5 +-
 .../runtime/CL/functions/CLGEMMConvolutionLayer.h  |   7 +-
 arm_compute/runtime/CL/functions/CLGaussian5x5.h   |   7 +-
 arm_compute/runtime/CL/functions/CLHistogram.h     |   3 +-
 arm_compute/runtime/CL/functions/CLOpticalFlow.h   |   9 +-
 .../runtime/CL/functions/CLReductionOperation.h    |   7 +-
 arm_compute/runtime/CL/functions/CLSobel5x5.h      |   7 +-
 arm_compute/runtime/CL/functions/CLSobel7x7.h      |   7 +-
 arm_compute/runtime/GLES_COMPUTE/GCTensor.h        |   5 +-
 .../runtime/GLES_COMPUTE/GCTensorAllocator.h       |   9 +-
 arm_compute/runtime/ILifetimeManager.h             |   4 +-
 arm_compute/runtime/LutAllocator.h                 |   7 +-
 arm_compute/runtime/MemoryGroup.h                  |   3 +-
 arm_compute/runtime/MemoryGroupBase.h              |   6 +-
 arm_compute/runtime/NEON/AssemblyHelper.h          |  60 +++++-
 arm_compute/runtime/NEON/NEScheduler.h             |   3 +-
 arm_compute/runtime/NEON/functions/NECannyEdge.h   |   4 +-
 .../runtime/NEON/functions/NEHarrisCorners.h       |   4 +-
 arm_compute/runtime/NEON/functions/NEOpticalFlow.h |   8 +-
 arm_compute/runtime/Scheduler.h                    |  11 +-
 arm_compute/runtime/Tensor.h                       |   3 +-
 arm_compute/runtime/TensorAllocator.h              |  16 +-
 144 files changed, 1288 insertions(+), 392 deletions(-)

(limited to 'arm_compute')

diff --git a/arm_compute/core/AccessWindowAutoPadding.h b/arm_compute/core/AccessWindowAutoPadding.h
index 0003bb26cd..78114d46a9 100644
--- a/arm_compute/core/AccessWindowAutoPadding.h
+++ b/arm_compute/core/AccessWindowAutoPadding.h
@@ -52,16 +52,25 @@ public:
      * @param[in,out] info Tensor info of the accessed kernel.
      */
     AccessWindowAutoPadding(ITensorInfo *info);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     AccessWindowAutoPadding(const AccessWindowAutoPadding &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     AccessWindowAutoPadding &operator=(const AccessWindowAutoPadding &) = delete;
-    AccessWindowAutoPadding(AccessWindowAutoPadding &&)                 = default;
+    /** Allow instances of this class to be move constructed */
+    AccessWindowAutoPadding(AccessWindowAutoPadding &&) = default;
+    /** Allow instances of this class to be moved */
     AccessWindowAutoPadding &operator=(AccessWindowAutoPadding &&) = default;
-    ~AccessWindowAutoPadding()                                     = default;
+    /** Default destructor */
+    ~AccessWindowAutoPadding() = default;
 
     /** Set the valid region to match the entire tensor. */
     void set_valid_region();
 
-    /** Return a valid region that spans across the entire tensor. */
+    /** Return a valid region that spans across the entire tensor.
+     *
+     * @return a valid region.
+     *
+     */
     ValidRegion compute_valid_region() const;
 
     // Inherited methods overridden:
diff --git a/arm_compute/core/AccessWindowStatic.h b/arm_compute/core/AccessWindowStatic.h
index a0ceeda273..92129ac27e 100644
--- a/arm_compute/core/AccessWindowStatic.h
+++ b/arm_compute/core/AccessWindowStatic.h
@@ -56,11 +56,16 @@ public:
      */
     AccessWindowStatic(ITensorInfo *info, int start_x, int start_y, int end_x, int end_y);
 
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     AccessWindowStatic(const AccessWindowStatic &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     AccessWindowStatic &operator=(const AccessWindowStatic &) = delete;
-    AccessWindowStatic(AccessWindowStatic &&)                 = default;
+    /** Allow instances of this class to be move constructed */
+    AccessWindowStatic(AccessWindowStatic &&) = default;
+    /** Allow instances of this class to be moved */
     AccessWindowStatic &operator=(AccessWindowStatic &&) = default;
-    ~AccessWindowStatic()                                = default;
+    /** Default destructor */
+    ~AccessWindowStatic() = default;
 
     /** Set the valid region based on the static access pattern and valid
      *  region of the inputs.
@@ -74,6 +79,9 @@ public:
      *
      * @param[in] window             Execution window of the kernel.
      * @param[in] input_valid_region Combined valid region of all inputs.
+     *
+     * @return a valid region.
+     *
      */
     ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region) const;
 
@@ -82,6 +90,7 @@ public:
     bool update_padding_if_needed(const Window &window) override;
     ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
 
+private:
     ITensorInfo *_info;
     int          _start_x;
     int          _start_y;
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index 66423d648a..d239a6159b 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -35,11 +35,11 @@ namespace arm_compute
 enum class DataType;
 enum class GPUTarget;
 
-/** Enable operation operations on GPUTarget enumerations */
+/** Enable bitwise operations on GPUTarget enumerations */
 template <>
 struct enable_bitwise_ops<arm_compute::GPUTarget>
 {
-    static constexpr bool value = true;
+    static constexpr bool value = true; /**< Enabled. */
 };
 
 /** Max vector width of an OpenCL vector */
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h
index 953b43c038..12e424fce9 100644
--- a/arm_compute/core/CL/CLKernelLibrary.h
+++ b/arm_compute/core/CL/CLKernelLibrary.h
@@ -104,11 +104,11 @@ public:
     Program(const Program &) = default;
     /** Default Move Constructor. */
     Program(Program &&) = default;
-    /** Default copy assignment operator. */
+    /** Default copy assignment operator */
     Program &operator=(const Program &) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     Program &operator=(Program &&) = default;
-    /**Returns program name.
+    /** Returns program name.
      *
      * @return Program's name.
      */
@@ -121,7 +121,13 @@ public:
      * @return The CL program object.
      */
     explicit operator cl::Program() const;
-
+    /** Build the given CL program.
+     *
+     * @param[in] program       The CL program to build.
+     * @param[in] build_options Options to build the CL program.
+     *
+     * @return True if the CL program builds successfully.
+     */
     static bool build(const cl::Program &program, const std::string &build_options = "");
     /** Build the underlying CL program.
      *
@@ -150,9 +156,9 @@ public:
     Kernel(const Kernel &) = default;
     /** Default Move Constructor. */
     Kernel(Kernel &&) = default;
-    /** Default copy assignment operator. */
+    /** Default copy assignment operator */
     Kernel &operator=(const Kernel &) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     Kernel &operator=(Kernel &&) = default;
     /** Constructor.
      *
@@ -192,9 +198,9 @@ private:
     CLKernelLibrary();
 
 public:
-    /** Prevent instances of this class from being copied. */
+    /** Prevent instances of this class from being copied */
     CLKernelLibrary(const CLKernelLibrary &) = delete;
-    /** Prevent instances of this class from being copied. */
+    /** Prevent instances of this class from being copied */
     const CLKernelLibrary &operator=(const CLKernelLibrary &) = delete;
     /** Access the KernelLibrary singleton.
      * @return The KernelLibrary instance.
@@ -226,9 +232,11 @@ public:
     {
         return _kernel_path;
     };
-    /** Gets the source of the selected program
+    /** Gets the source of the selected program.
      *
      * @param[in] program_name Program name.
+     *
+     * @return Source of the selected program.
      */
     std::string get_program_source(const std::string &program_name);
     /** Sets the CL context used to create programs.
diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h
index c207ec7611..a9d5fdd063 100644
--- a/arm_compute/core/CL/CLTypes.h
+++ b/arm_compute/core/CL/CLTypes.h
@@ -51,7 +51,7 @@ enum class GPUTarget
     TBOX          = 0x260
 };
 
-/* Available OpenCL Version */
+/** Available OpenCL Version */
 enum class CLVersion
 {
     CL10,   /* the OpenCL 1.0 */
diff --git a/arm_compute/core/CL/ICLArray.h b/arm_compute/core/CL/ICLArray.h
index 6c3dbcd170..22fc7cf32e 100644
--- a/arm_compute/core/CL/ICLArray.h
+++ b/arm_compute/core/CL/ICLArray.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,17 +35,26 @@ template <class T>
 class ICLArray : public IArray<T>
 {
 public:
-    /* Constructor */
+    /** Constructor
+     *
+     * @param[in] max_num_values Maximum size of the array.
+     *
+     */
     explicit ICLArray(size_t max_num_values)
         : IArray<T>(max_num_values), _mapping(nullptr)
     {
     }
 
+    /** Prevent instances of this class from being copy constructed */
     ICLArray(const ICLArray &) = delete;
+    /** Prevent instances of this class from being copied */
     ICLArray &operator=(const ICLArray &) = delete;
-    ICLArray(ICLArray &&)                 = default;
+    /** Allow instances of this class to be move constructed */
+    ICLArray(ICLArray &&) = default;
+    /** Allow instances of this class to be moved */
     ICLArray &operator=(ICLArray &&) = default;
-    virtual ~ICLArray()              = default;
+    /** Default virtual destructor. */
+    virtual ~ICLArray() = default;
     /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the array's data.
      *
      * @return A reference to an OpenCL buffer containing the array's data.
@@ -106,16 +115,27 @@ private:
     uint8_t *_mapping;
 };
 
-using ICLKeyPointArray        = ICLArray<KeyPoint>;
-using ICLCoordinates2DArray   = ICLArray<Coordinates2D>;
+/** Interface for OpenCL Array of Key Points. */
+using ICLKeyPointArray = ICLArray<KeyPoint>;
+/** Interface for OpenCL Array of 2D Coordinates. */
+using ICLCoordinates2DArray = ICLArray<Coordinates2D>;
+/** Interface for OpenCL Array of Detection Windows. */
 using ICLDetectionWindowArray = ICLArray<DetectionWindow>;
-using ICLROIArray             = ICLArray<ROI>;
-using ICLSize2DArray          = ICLArray<Size2D>;
-using ICLUInt8Array           = ICLArray<cl_uchar>;
-using ICLUInt16Array          = ICLArray<cl_ushort>;
-using ICLUInt32Array          = ICLArray<cl_uint>;
-using ICLInt16Array           = ICLArray<cl_short>;
-using ICLInt32Array           = ICLArray<cl_int>;
-using ICLFloatArray           = ICLArray<cl_float>;
+/** Interface for OpenCL Array of ROIs. */
+using ICLROIArray = ICLArray<ROI>;
+/** Interface for OpenCL Array of 2D Sizes. */
+using ICLSize2DArray = ICLArray<Size2D>;
+/** Interface for OpenCL Array of uint8s. */
+using ICLUInt8Array = ICLArray<cl_uchar>;
+/** Interface for OpenCL Array of uint16s. */
+using ICLUInt16Array = ICLArray<cl_ushort>;
+/** Interface for OpenCL Array of uint32s. */
+using ICLUInt32Array = ICLArray<cl_uint>;
+/** Interface for OpenCL Array of int16s. */
+using ICLInt16Array = ICLArray<cl_short>;
+/** Interface for OpenCL Array of int32s. */
+using ICLInt32Array = ICLArray<cl_int>;
+/** Interface for OpenCL Array of floats. */
+using ICLFloatArray = ICLArray<cl_float>;
 }
 #endif /*__ARM_COMPUTE_ICLARRAY_H__*/
diff --git a/arm_compute/core/CL/ICLDistribution1D.h b/arm_compute/core/CL/ICLDistribution1D.h
index 8fbbbbf548..9816029172 100644
--- a/arm_compute/core/CL/ICLDistribution1D.h
+++ b/arm_compute/core/CL/ICLDistribution1D.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,9 +49,9 @@ public:
      * @param[in] range    The total number of the consecutive values of the distribution interval.
      */
     ICLDistribution1D(size_t num_bins, int32_t offset, uint32_t range);
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     ICLDistribution1D(const ICLDistribution1D &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     const ICLDistribution1D &operator=(const ICLDistribution1D &) = delete;
     /** Enqueue a map operation of the allocated buffer on the given queue.
      *
diff --git a/arm_compute/core/CL/ICLHOG.h b/arm_compute/core/CL/ICLHOG.h
index a3d2fb4a57..4d2d7a3316 100644
--- a/arm_compute/core/CL/ICLHOG.h
+++ b/arm_compute/core/CL/ICLHOG.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,9 +42,9 @@ class ICLHOG : public IHOG
 public:
     /** Default constructor */
     ICLHOG();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     ICLHOG(const ICLHOG &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     ICLHOG &operator=(const ICLHOG &) = delete;
     /** Allow instances of this class to be moved */
     ICLHOG(ICLHOG &&) = default;
diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h
index f331df2996..c7d0c2156b 100644
--- a/arm_compute/core/CL/ICLKernel.h
+++ b/arm_compute/core/CL/ICLKernel.h
@@ -293,6 +293,14 @@ protected:
  */
 void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange());
 
+/** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx            Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in]     array          Array to set as an argument of the object's kernel.
+ * @param[in]     strides        @ref Strides object containing stride of each dimension in bytes.
+ * @param[in]     num_dimensions Number of dimensions of the @p array.
+ * @param[in]     window         Window the kernel will be executed on.
+ */
 template <typename T, unsigned int dimension_size>
 void ICLKernel::add_array_argument(unsigned &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
 {
diff --git a/arm_compute/core/CL/ICLMultiImage.h b/arm_compute/core/CL/ICLMultiImage.h
index 774175607b..c06ddc826a 100644
--- a/arm_compute/core/CL/ICLMultiImage.h
+++ b/arm_compute/core/CL/ICLMultiImage.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,6 +29,7 @@
 namespace arm_compute
 {
 class ICLTensor;
+/** Interface for OpenCL images */
 using ICLImage = ICLTensor;
 
 /** Interface for OpenCL multi-planar images */
@@ -39,14 +40,14 @@ public:
      *
      * @param[in] index The index of the wanted planed.
      *
-     *  @return A pointer pointed to the OpenCL plane
+     * @return A pointer pointed to the OpenCL plane
      */
     virtual ICLImage *cl_plane(unsigned int index) = 0;
     /** Return a constant pointer to the requested OpenCL plane of the image.
      *
      * @param[in] index The index of the wanted planed.
      *
-     *  @return A constant pointer pointed to the OpenCL plane
+     * @return A constant pointer pointed to the OpenCL plane
      */
     virtual const ICLImage *cl_plane(unsigned int index) const = 0;
 
diff --git a/arm_compute/core/CL/ICLSimpleKernel.h b/arm_compute/core/CL/ICLSimpleKernel.h
index e9fdb7fb8b..c4a1f8b396 100644
--- a/arm_compute/core/CL/ICLSimpleKernel.h
+++ b/arm_compute/core/CL/ICLSimpleKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,13 +36,13 @@ class ICLSimpleKernel : public ICLKernel
 public:
     /** Constructor. */
     ICLSimpleKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     ICLSimpleKernel(const ICLSimpleKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     ICLSimpleKernel &operator=(const ICLSimpleKernel &) = delete;
-    /** Allow instances of this class to be moved. */
+    /** Allow instances of this class to be moved */
     ICLSimpleKernel(ICLSimpleKernel &&) = default;
-    /** Allow instances of this class to be moved. */
+    /** Allow instances of this class to be moved */
     ICLSimpleKernel &operator=(ICLSimpleKernel &&) = default;
     /** Default destructor */
     ~ICLSimpleKernel() = default;
diff --git a/arm_compute/core/CL/ICLTensor.h b/arm_compute/core/CL/ICLTensor.h
index abc0131379..0f5dba923b 100644
--- a/arm_compute/core/CL/ICLTensor.h
+++ b/arm_compute/core/CL/ICLTensor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,12 +40,18 @@ namespace arm_compute
 class ICLTensor : public ITensor
 {
 public:
+    /** Default constructor. */
     ICLTensor();
+    /** Prevent instances of this class from being copy constructed */
     ICLTensor(const ICLTensor &) = delete;
+    /** Prevent instances of this class from being copied */
     ICLTensor &operator=(const ICLTensor &) = delete;
-    ICLTensor(ICLTensor &&)                 = default;
+    /** Allow instances of this class to be move constructed */
+    ICLTensor(ICLTensor &&) = default;
+    /** Allow instances of this class to be copied */
     ICLTensor &operator=(ICLTensor &&) = default;
-    virtual ~ICLTensor()               = default;
+    /** Default virtual destructor. */
+    virtual ~ICLTensor() = default;
 
     /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the image's data.
      *
diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h
index 405d5cebd7..4b4a8b807d 100644
--- a/arm_compute/core/CL/OpenCL.h
+++ b/arm_compute/core/CL/OpenCL.h
@@ -43,8 +43,13 @@ static const NDRange Range_128_1 = NDRange(128, 1);
 
 namespace arm_compute
 {
+/** Check if OpenCL is available.
+ *
+ * @return True if OpenCL is available.
+ */
 bool opencl_is_available();
 
+/** Class for loading OpenCL symbols. */
 class CLSymbols final
 {
 private:
@@ -52,8 +57,22 @@ private:
     void load_symbols(void *handle);
 
 public:
+    /** Get the static instance of CLSymbols.
+     *
+     * @return The static instance of CLSymbols.
+     */
     static CLSymbols &get();
+    /** Load symbols from the given OpenCL library path.
+     *
+     * @param[in] library Path to the OpenCL library.
+     *
+     * @return True if loading the library is successful.
+     */
     bool load(const std::string &library);
+    /** Load symbols from any of the default OpenCL library names.
+     *
+     * @return True if loading any library is successful.
+     */
     bool load_default();
 
 #define DECLARE_FUNCTION_PTR(func_name) \
diff --git a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h
index 3a7bb40b2a..0858ef7376 100644
--- a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h
+++ b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,13 +40,13 @@ class CLAbsoluteDifferenceKernel : public ICLKernel
 public:
     /** Default constructor. */
     CLAbsoluteDifferenceKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLAbsoluteDifferenceKernel(const CLAbsoluteDifferenceKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLAbsoluteDifferenceKernel &operator=(const CLAbsoluteDifferenceKernel &) = delete;
-    /** Allow instances of this class to be moved. */
+    /** Allow instances of this class to be moved */
     CLAbsoluteDifferenceKernel(CLAbsoluteDifferenceKernel &&) = default;
-    /** Allow instances of this class to be moved. */
+    /** Allow instances of this class to be moved */
     CLAbsoluteDifferenceKernel &operator=(CLAbsoluteDifferenceKernel &&) = default;
     /** Default destructor */
     ~CLAbsoluteDifferenceKernel() = default;
diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
index e9fd564fbd..dbb25dd7c7 100644
--- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
@@ -43,7 +43,7 @@ public:
     CLBatchNormalizationLayerKernel &operator=(const CLBatchNormalizationLayerKernel &) = delete;
     /** Default Move Constructor. */
     CLBatchNormalizationLayerKernel(CLBatchNormalizationLayerKernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     CLBatchNormalizationLayerKernel &operator=(CLBatchNormalizationLayerKernel &&) = default;
     /** Default destructor */
     ~CLBatchNormalizationLayerKernel() = default;
diff --git a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h
index 2f0d30af83..16ba53ebeb 100644
--- a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h
+++ b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,9 +40,9 @@ class CLBitwiseAndKernel : public ICLKernel
 public:
     /** Default constructor. */
     CLBitwiseAndKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLBitwiseAndKernel(const CLBitwiseAndKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLBitwiseAndKernel &operator=(const CLBitwiseAndKernel &) = delete;
     /** Allow instances of this class to be moved */
     CLBitwiseAndKernel(CLBitwiseAndKernel &&) = default;
diff --git a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h
index 053804413e..0caaf33ca6 100644
--- a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h
+++ b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,9 +40,9 @@ class CLBitwiseOrKernel : public ICLKernel
 public:
     /** Default constructor. */
     CLBitwiseOrKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLBitwiseOrKernel(const CLBitwiseOrKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLBitwiseOrKernel &operator=(const CLBitwiseOrKernel &) = delete;
     /** Allow instances of this class to be moved */
     CLBitwiseOrKernel(CLBitwiseOrKernel &&) = default;
diff --git a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h
index 8443e2e871..905fb69998 100644
--- a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h
+++ b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,9 +40,9 @@ class CLBitwiseXorKernel : public ICLKernel
 public:
     /** Default constructor. */
     CLBitwiseXorKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLBitwiseXorKernel(const CLBitwiseXorKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLBitwiseXorKernel &operator=(const CLBitwiseXorKernel &) = delete;
     /** Allow instances of this class to be moved */
     CLBitwiseXorKernel(CLBitwiseXorKernel &&) = default;
diff --git a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
index 2715449365..76ab10ccb3 100644
--- a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
+++ b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,9 +39,9 @@ class CLGradientKernel : public ICLKernel
 public:
     /** Constructor */
     CLGradientKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLGradientKernel(const CLGradientKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLGradientKernel &operator=(const CLGradientKernel &) = delete;
     /** Initialise the kernel's sources, destinations and border mode.
      *
diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
index 8867ca1c37..f31560cb86 100644
--- a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
+++ b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,7 +43,7 @@ public:
     CLDeconvolutionLayerUpsampleKernel &operator=(const CLDeconvolutionLayerUpsampleKernel &) = delete;
     /** Default Move Constructor. */
     CLDeconvolutionLayerUpsampleKernel(CLDeconvolutionLayerUpsampleKernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     CLDeconvolutionLayerUpsampleKernel &operator=(CLDeconvolutionLayerUpsampleKernel &&) = default;
     /** Default destructor */
     ~CLDeconvolutionLayerUpsampleKernel() = default;
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h
index 84bc09d1fb..5f72cf70ed 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h
@@ -43,7 +43,7 @@ public:
     CLDepthwiseConvolutionLayer3x3Kernel &operator=(const CLDepthwiseConvolutionLayer3x3Kernel &) = delete;
     /** Default Move Constructor. */
     CLDepthwiseConvolutionLayer3x3Kernel(CLDepthwiseConvolutionLayer3x3Kernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     CLDepthwiseConvolutionLayer3x3Kernel &operator=(CLDepthwiseConvolutionLayer3x3Kernel &&) = default;
     /** Initialize the function's source, destination, conv and border_size.
      *
diff --git a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
index 38aa63e98f..25fd3378cb 100644
--- a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h
@@ -46,7 +46,7 @@ public:
     CLDequantizationLayerKernel &operator=(const CLDequantizationLayerKernel &) = delete;
     /** Default Move Constructor. */
     CLDequantizationLayerKernel(CLDequantizationLayerKernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     CLDequantizationLayerKernel &operator=(CLDequantizationLayerKernel &&) = default;
     /** Default destructor */
     ~CLDequantizationLayerKernel() = default;
diff --git a/arm_compute/core/CL/kernels/CLDerivativeKernel.h b/arm_compute/core/CL/kernels/CLDerivativeKernel.h
index 58b0de3950..8debe2c4b9 100644
--- a/arm_compute/core/CL/kernels/CLDerivativeKernel.h
+++ b/arm_compute/core/CL/kernels/CLDerivativeKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,9 +36,9 @@ class CLDerivativeKernel : public ICLKernel
 public:
     /** Default constructor */
     CLDerivativeKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLDerivativeKernel(const CLDerivativeKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLDerivativeKernel &operator=(const CLDerivativeKernel &) = delete;
     /** Allow instances of this class to be moved */
     CLDerivativeKernel(CLDerivativeKernel &&) = default;
diff --git a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h
index f5146608e1..08cf4c2553 100644
--- a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h
+++ b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,9 +49,9 @@ class CLIntegralImageVertKernel : public ICLKernel
 public:
     /** Default constructor */
     CLIntegralImageVertKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLIntegralImageVertKernel(const CLIntegralImageVertKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLIntegralImageVertKernel &operator=(const CLIntegralImageVertKernel &) = delete;
     /** Allow instances of this class to be moved */
     CLIntegralImageVertKernel(CLIntegralImageVertKernel &&) = default;
diff --git a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h
index 5b995c1151..3f7125160f 100644
--- a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h
+++ b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -62,9 +62,12 @@ struct CLOldValue
     int16_t dummy; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */
 };
 
+/** Interface for OpenCL Array of Internal Key Points. */
 using ICLLKInternalKeypointArray = ICLArray<CLLKInternalKeypoint>;
-using ICLCoefficientTableArray   = ICLArray<CLCoefficientTable>;
-using ICLOldValArray             = ICLArray<CLOldValue>;
+/** Interface for OpenCL Array of Coefficient Tables. */
+using ICLCoefficientTableArray = ICLArray<CLCoefficientTable>;
+/** Interface for OpenCL Array of Old Values. */
+using ICLOldValArray = ICLArray<CLOldValue>;
 
 /** Interface to run the initialization step of LKTracker */
 class CLLKTrackerInitKernel : public ICLKernel
diff --git a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
index fba48a53b6..28fbe4fcfd 100644
--- a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
+++ b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,9 +39,9 @@ class CLMagnitudePhaseKernel : public ICLKernel
 public:
     /** Default constructor. */
     CLMagnitudePhaseKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLMagnitudePhaseKernel(const CLMagnitudePhaseKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLMagnitudePhaseKernel &operator=(const CLMagnitudePhaseKernel &) = delete;
     /** Allow instances of this class to be moved */
     CLMagnitudePhaseKernel(CLMagnitudePhaseKernel &&) = default;
diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
index d931152cb9..ef00e59e5c 100644
--- a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,7 +43,7 @@ public:
     CLNormalizationLayerKernel &operator=(const CLNormalizationLayerKernel &) = delete;
     /** Default Move Constructor. */
     CLNormalizationLayerKernel(CLNormalizationLayerKernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     CLNormalizationLayerKernel &operator=(CLNormalizationLayerKernel &&) = default;
     /** Set the input and output tensors.
      *
diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
index 1ecd9be8cd..fcabb614df 100644
--- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
@@ -39,9 +39,9 @@ class CLPixelWiseMultiplicationKernel : public ICLKernel
 public:
     /** Default constructor.*/
     CLPixelWiseMultiplicationKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLPixelWiseMultiplicationKernel(const CLPixelWiseMultiplicationKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLPixelWiseMultiplicationKernel &operator=(const CLPixelWiseMultiplicationKernel &) = delete;
     /** Allow instances of this class to be moved */
     CLPixelWiseMultiplicationKernel(CLPixelWiseMultiplicationKernel &&) = default;
diff --git a/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h
index 49d76087b5..5d78dce1c2 100644
--- a/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h
@@ -45,7 +45,7 @@ public:
     CLQuantizationLayerKernel &operator=(const CLQuantizationLayerKernel &) = delete;
     /** Default Move Constructor. */
     CLQuantizationLayerKernel(CLQuantizationLayerKernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     CLQuantizationLayerKernel &operator=(CLQuantizationLayerKernel &&) = default;
     /** Default destructor */
     ~CLQuantizationLayerKernel() = default;
diff --git a/arm_compute/core/CL/kernels/CLRemapKernel.h b/arm_compute/core/CL/kernels/CLRemapKernel.h
index 47692e7a00..1cabe1f0af 100644
--- a/arm_compute/core/CL/kernels/CLRemapKernel.h
+++ b/arm_compute/core/CL/kernels/CLRemapKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,9 +37,9 @@ class CLRemapKernel : public ICLKernel
 public:
     /** Default constructor */
     CLRemapKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLRemapKernel(const CLRemapKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLRemapKernel &operator=(const CLRemapKernel &) = delete;
     /** Allow instances of this class to be moved */
     CLRemapKernel(CLRemapKernel &&) = default;
diff --git a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h
index 63515fd237..99f712f466 100644
--- a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h
+++ b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -52,9 +52,9 @@ class CLScharr3x3Kernel : public ICLKernel
 public:
     /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
     CLScharr3x3Kernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLScharr3x3Kernel(const CLScharr3x3Kernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLScharr3x3Kernel &operator=(const CLScharr3x3Kernel &) = delete;
     /** Allow instances of this class to be moved */
     CLScharr3x3Kernel(CLScharr3x3Kernel &&) = default;
diff --git a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h
index fd596d96d4..21f70a84ea 100644
--- a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h
+++ b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,9 +36,9 @@ class CLSobel3x3Kernel : public ICLKernel
 public:
     /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
     CLSobel3x3Kernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLSobel3x3Kernel(const CLSobel3x3Kernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLSobel3x3Kernel &operator=(const CLSobel3x3Kernel &) = delete;
     /** Allow instances of this class to be moved */
     CLSobel3x3Kernel(CLSobel3x3Kernel &&) = default;
diff --git a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h
index 53542ac705..730a73f9a4 100644
--- a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h
+++ b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,9 +36,9 @@ class CLSobel5x5HorKernel : public ICLKernel
 public:
     /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
     CLSobel5x5HorKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLSobel5x5HorKernel(const CLSobel5x5HorKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLSobel5x5HorKernel &operator=(const CLSobel5x5HorKernel &) = delete;
     /** Allow instances of this class to be moved */
     CLSobel5x5HorKernel(CLSobel5x5HorKernel &&) = default;
@@ -77,9 +77,9 @@ class CLSobel5x5VertKernel : public ICLKernel
 public:
     /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
     CLSobel5x5VertKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLSobel5x5VertKernel(const CLSobel5x5VertKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLSobel5x5VertKernel &operator=(const CLSobel5x5VertKernel &) = delete;
     /** Allow instances of this class to be moved */
     CLSobel5x5VertKernel(CLSobel5x5VertKernel &&) = default;
diff --git a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h
index 4fa1c931a6..c675ebb92c 100644
--- a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h
+++ b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,9 +36,9 @@ class CLSobel7x7HorKernel : public ICLKernel
 public:
     /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
     CLSobel7x7HorKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLSobel7x7HorKernel(const CLSobel7x7HorKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLSobel7x7HorKernel &operator=(const CLSobel7x7HorKernel &) = delete;
     /** Allow instances of this class to be moved */
     CLSobel7x7HorKernel(CLSobel7x7HorKernel &&) = default;
@@ -77,9 +77,9 @@ class CLSobel7x7VertKernel : public ICLKernel
 public:
     /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
     CLSobel7x7VertKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLSobel7x7VertKernel(const CLSobel7x7VertKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLSobel7x7VertKernel &operator=(const CLSobel7x7VertKernel &) = delete;
     /** Allow instances of this class to be moved */
     CLSobel7x7VertKernel(CLSobel7x7VertKernel &&) = default;
diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
index c072d2a6de..c562565175 100644
--- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -100,6 +100,7 @@ private:
 class CLLogits1DMaxShiftExpSumKernel : public ICLKernel
 {
 public:
+    /** Info for whether a parallel reduction will be run and the vector size of the execution. */
     using ParallelReductionInfo = std::tuple<bool, unsigned int>;
 
 public:
@@ -139,8 +140,8 @@ public:
      *
      * @param[in] size Size to check
      *
-     * @return A two-element tuple where the first element is a boolean specifying is a parallel reduction will be run,
-     *         while the second elements is the vector size of the execution.
+     * @return A two-element tuple where the first element is a boolean specifying if a parallel reduction will be run,
+     *         while the second element is the vector size of the execution.
      */
     static ParallelReductionInfo is_parallel_reduction(size_t size);
 
diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
index b9ede12e3d..7a54284199 100644
--- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
+++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
@@ -28,6 +28,29 @@
 
 namespace arm_compute
 {
+/** OpenCL kernel to perform reshaping on the weights used by convolution and locally connected layer
+ *
+ * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels.
+ * In combination with the @ref CLIm2ColKernel can transform a convolution to a matrix multiplication.
+ *
+ * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have:
+ * @f[
+ * \left( \begin{array}{ccc}
+ * a000 & a001 & a002 \\
+ * a010 & a011 & a012 \\
+ * a020 & a021 & a022 \\
+ * \end{array} \right)
+ * \left( \begin{array}{ccc}
+ * a100 & a101 & a102 \\
+ * a110 & a111 & a112 \\
+ * a120 & a121 & a122 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccc}
+ * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\
+ * \end{array} \right)
+ * @f]
+ */
 class CLWeightsReshapeKernel : public ICLKernel
 {
 public:
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h
index cff49db0ac..3abc0a2e88 100644
--- a/arm_compute/core/CPP/CPPTypes.h
+++ b/arm_compute/core/CPP/CPPTypes.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -48,18 +48,20 @@ enum class CPUTarget
     A75_DOT = (A75 | DOT),
 };
 
+/** Information about a CPU. */
 struct CPUInfo
 {
-    CPUTarget CPU{ CPUTarget::INTRINSICS };
-    int       L1_size{ 0 };
-    int       L2_size{ 0 };
+    CPUTarget CPU{ CPUTarget::INTRINSICS }; /**< CPU target. */
+    int       L1_size{ 0 };                 /**< Size of L1 cache. */
+    int       L2_size{ 0 };                 /**< Size of L2 cache. */
 };
 
+/** Information about executing thread and CPU. */
 struct ThreadInfo
 {
-    int     thread_id{ 0 };
-    int     num_threads{ 1 };
-    CPUInfo cpu_info{};
+    int     thread_id{ 0 };   /**< Executing thread. */
+    int     num_threads{ 1 }; /**< Number of CPU threads. */
+    CPUInfo cpu_info{};       /**< CPU information. */
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_CPP_TYPES_H__ */
diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h
index 0580b19ba1..d8cdc794ef 100644
--- a/arm_compute/core/CPP/ICPPSimpleKernel.h
+++ b/arm_compute/core/CPP/ICPPSimpleKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,7 +30,7 @@ namespace arm_compute
 {
 class ITensor;
 
-/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */
+/** Interface for simple C++ kernels having 1 tensor input and 1 tensor output */
 class ICPPSimpleKernel : public ICPPKernel
 {
 public:
diff --git a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h
index b89816ef15..27ac7f03cf 100644
--- a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h
@@ -33,6 +33,7 @@
 namespace arm_compute
 {
 class ITensor;
+/** Interface for CPP Images. */
 using IImage = ITensor;
 
 /** CPP kernel to perform corner candidates
diff --git a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h
index 87766a7a56..512d2d8327 100644
--- a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h
@@ -55,7 +55,7 @@ public:
     CPPDetectionWindowNonMaximaSuppressionKernel &operator=(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default;
     /** Initialise the kernel's input, output and the euclidean minimum distance
      *
-     * @attention: If @ref CLDetectionWindowArray is passed to the kernel, the map() and unmap() methods @ref CLDetectionWindowArray must be called respectively before and after
+     * @attention: If @ref IDetectionWindowArray is passed to the kernel, the map() and unmap() methods @ref IDetectionWindowArray must be called respectively before and after
      *             the run() method of @ref CPPDetectionWindowNonMaximaSuppressionKernel
      *
      * @param[in, out] input_output Input/Output array of @ref DetectionWindow
diff --git a/arm_compute/core/Dimensions.h b/arm_compute/core/Dimensions.h
index 5319346866..0a9264f6b0 100644
--- a/arm_compute/core/Dimensions.h
+++ b/arm_compute/core/Dimensions.h
@@ -33,7 +33,7 @@
 
 namespace arm_compute
 {
-/* Constant value used to indicate maximum dimensions of a Window, TensorShape and Coordinates */
+/** Constant value used to indicate maximum dimensions of a Window, TensorShape and Coordinates */
 constexpr size_t MAX_DIMS = 6;
 
 /** Dimensions with dimensionality */
@@ -166,32 +166,50 @@ public:
         collapse(num_dimensions() - start, start);
     }
 
-    /** Returns a read/write iterator that points to the first element in the dimension array. */
+    /** Returns a read/write iterator that points to the first element in the dimension array.
+     *
+     * @return an iterator.
+     */
     typename std::array<T, num_max_dimensions>::iterator begin()
     {
         return _id.begin();
     }
-    /** Returns a read-only (constant) iterator that points to the first element in the dimension array. */
+    /** Returns a read-only (constant) iterator that points to the first element in the dimension array.
+     *
+     * @return an iterator.
+     */
     typename std::array<T, num_max_dimensions>::const_iterator begin() const
     {
         return _id.begin();
     }
-    /** Returns a read-only (constant) iterator that points to the first element in the dimension array. */
+    /** Returns a read-only (constant) iterator that points to the first element in the dimension array.
+     *
+     * @return an iterator.
+     */
     typename std::array<T, num_max_dimensions>::const_iterator cbegin() const
     {
         return begin();
     }
-    /** Returns a read/write iterator that points one past the last element in the dimension array. */
+    /** Returns a read/write iterator that points one past the last element in the dimension array.
+     *
+     * @return an iterator.
+     */
     typename std::array<T, num_max_dimensions>::iterator end()
     {
         return _id.end();
     }
-    /** Returns a read-only (constant) iterator that points one past the last element in the dimension array. */
+    /** Returns a read-only (constant) iterator that points one past the last element in the dimension array.
+     *
+     * @return an iterator.
+     */
     typename std::array<T, num_max_dimensions>::const_iterator end() const
     {
         return _id.end();
     }
-    /** Returns a read-only (constant) iterator that points one past the last element in the dimension array. */
+    /** Returns a read-only (constant) iterator that points one past the last element in the dimension array.
+     *
+     * @return an iterator.
+     */
     typename std::array<T, num_max_dimensions>::const_iterator cend() const
     {
         return end();
@@ -205,11 +223,25 @@ protected:
     size_t _num_dimensions{ 0 };
 };
 
+/** Check that given dimensions are equal.
+ *
+ * @param[in] lhs Left-hand side Dimensions.
+ * @param[in] rhs Right-hand side Dimensions.
+ *
+ * @return True if the given dimensions are equal.
+ */
 template <typename T>
 inline bool operator==(const Dimensions<T> &lhs, const Dimensions<T> &rhs)
 {
     return ((lhs.num_dimensions() == rhs.num_dimensions()) && std::equal(lhs.cbegin(), lhs.cend(), rhs.cbegin()));
 }
+/** Check that given dimensions are not equal.
+ *
+ * @param[in] lhs Left-hand side Dimensions.
+ * @param[in] rhs Right-hand side Dimensions.
+ *
+ * @return True if the given dimensions are not equal.
+ */
 template <typename T>
 inline bool operator!=(const Dimensions<T> &lhs, const Dimensions<T> &rhs)
 {
diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h
index f178936ede..ce395ccfae 100644
--- a/arm_compute/core/Error.h
+++ b/arm_compute/core/Error.h
@@ -32,6 +32,8 @@ namespace arm_compute
 /** Ignores unused arguments
  *
  * @tparam T Argument types
+ *
+ * @param[in] ... Ignored arguments
  */
 template <typename... T>
 inline void ignore_unused(T &&...)
diff --git a/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h b/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h
index 34bd5673b8..c0d64e2edb 100644
--- a/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h
+++ b/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -51,9 +51,9 @@ public:
     GCProgram(const GCProgram &) = default;
     /** Default Move Constructor. */
     GCProgram(GCProgram &&) = default;
-    /** Default copy assignment operator. */
+    /** Default copy assignment operator */
     GCProgram &operator=(const GCProgram &) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     GCProgram &operator=(GCProgram &&) = default;
     /** Returns program name.
      *
@@ -93,9 +93,9 @@ public:
     GCKernel(const GCKernel &) = default;
     /** Default Move Constructor. */
     GCKernel(GCKernel &&) = default;
-    /** Default copy assignment operator. */
+    /** Default copy assignment operator */
     GCKernel &operator=(const GCKernel &) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     GCKernel &operator=(GCKernel &&) = default;
     /** Constructor.
      *
@@ -194,11 +194,14 @@ private:
     ~GCKernelLibrary();
 
 public:
-    /** Prevent instances of this class from being copied. */
+    /** Prevent instances of this class from being copied */
     GCKernelLibrary(const GCKernelLibrary &) = delete;
-    /** Prevent instances of this class from being copied. */
+    /** Prevent instances of this class from being copied */
     const GCKernelLibrary &operator=(const GCKernelLibrary &) = delete;
-
+    /** Get the static instance of @ref GCKernelLibrary.
+     *
+     * @return The static instance of GCKernelLibrary.
+     */
     static GCKernelLibrary &get();
     /** Initialises the kernel library.
      *
diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h
index a23c4e774e..b3d841d2fc 100644
--- a/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,13 +36,13 @@ class IGCSimpleKernel : public IGCKernel
 public:
     /** Constructor. */
     IGCSimpleKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     IGCSimpleKernel(const IGCSimpleKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     IGCSimpleKernel &operator=(const IGCSimpleKernel &) = delete;
-    /** Allow instances of this class to be moved. */
+    /** Allow instances of this class to be moved */
     IGCSimpleKernel(IGCSimpleKernel &&) = default;
-    /** Allow instances of this class to be moved. */
+    /** Allow instances of this class to be moved */
     IGCSimpleKernel &operator=(IGCSimpleKernel &&) = default;
     /** Default destructor */
     ~IGCSimpleKernel() = default;
diff --git a/arm_compute/core/GLES_COMPUTE/IGCTensor.h b/arm_compute/core/GLES_COMPUTE/IGCTensor.h
index 7329864b85..fcfcf690a9 100644
--- a/arm_compute/core/GLES_COMPUTE/IGCTensor.h
+++ b/arm_compute/core/GLES_COMPUTE/IGCTensor.h
@@ -38,10 +38,10 @@ public:
     /** Default constructor. */
     IGCTensor();
 
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     IGCTensor(const IGCTensor &) = delete;
 
-    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers) */
     IGCTensor &operator=(const IGCTensor &) = delete;
 
     /** Allow instances of this class to be moved */
@@ -107,6 +107,7 @@ private:
     bool     _needs_shifting;
 };
 
+/** Interface for GLES Compute image */
 using IGCImage = IGCTensor;
 }
 #endif /*__ARM_COMPUTE_IGCTENSOR_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h
index 71f7b37700..a441d24353 100644
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,13 +40,13 @@ class GCAbsoluteDifferenceKernel : public IGCKernel
 public:
     /** Default constructor. */
     GCAbsoluteDifferenceKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     GCAbsoluteDifferenceKernel(const GCAbsoluteDifferenceKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     GCAbsoluteDifferenceKernel &operator=(const GCAbsoluteDifferenceKernel &) = delete;
-    /** Allow instances of this class to be moved. */
+    /** Allow instances of this class to be moved */
     GCAbsoluteDifferenceKernel(GCAbsoluteDifferenceKernel &&) = default;
-    /** Allow instances of this class to be moved. */
+    /** Allow instances of this class to be moved */
     GCAbsoluteDifferenceKernel &operator=(GCAbsoluteDifferenceKernel &&) = default;
     /** Default destructor */
     ~GCAbsoluteDifferenceKernel() = default;
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
index 15d7f79afb..754268a348 100644
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
@@ -43,7 +43,7 @@ public:
     GCBatchNormalizationLayerKernel &operator=(const GCBatchNormalizationLayerKernel &) = delete;
     /** Default Move Constructor. */
     GCBatchNormalizationLayerKernel(GCBatchNormalizationLayerKernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     GCBatchNormalizationLayerKernel &operator=(GCBatchNormalizationLayerKernel &&) = default;
     /** Default destructor */
     ~GCBatchNormalizationLayerKernel() = default;
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h
index e10769db5e..46b9e897bb 100644
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,7 +43,7 @@ public:
     GCDepthwiseConvolutionLayer3x3Kernel &operator=(const GCDepthwiseConvolutionLayer3x3Kernel &) = delete;
     /** Default Move Constructor. */
     GCDepthwiseConvolutionLayer3x3Kernel(GCDepthwiseConvolutionLayer3x3Kernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     GCDepthwiseConvolutionLayer3x3Kernel &operator=(GCDepthwiseConvolutionLayer3x3Kernel &&) = default;
     /** Initialize the function's source, destination, conv and border_size.
      *
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h
index 415b781bc6..f1c9c1933d 100644
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -80,8 +80,11 @@ private:
     gles::NDRange    _lws;
 };
 
+/** Interface for the 1x1 direct convolution kernel */
 using GCDirectConvolutionLayer1x1Kernel = GCDirectConvolutionLayerKernel<1>;
+/** Interface for the 3x3 direct convolution kernel */
 using GCDirectConvolutionLayer3x3Kernel = GCDirectConvolutionLayerKernel<3>;
+/** Interface for the 5x5 direct convolution kernel */
 using GCDirectConvolutionLayer5x5Kernel = GCDirectConvolutionLayerKernel<5>;
 }
 #endif /*__ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h
index e8bc7ad2b2..96a04d1651 100644
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,7 +43,7 @@ public:
     GCNormalizationLayerKernel &operator=(const GCNormalizationLayerKernel &) = delete;
     /** Default Move Constructor. */
     GCNormalizationLayerKernel(GCNormalizationLayerKernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     GCNormalizationLayerKernel &operator=(GCNormalizationLayerKernel &&) = default;
     /** Default destrutor */
     ~GCNormalizationLayerKernel() = default;
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h
index ecf5f44f71..0d785ca0d4 100644
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,7 +43,7 @@ public:
     GCNormalizePlanarYUVLayerKernel &operator=(const GCNormalizePlanarYUVLayerKernel &) = delete;
     /** Default Move Constructor. */
     GCNormalizePlanarYUVLayerKernel(GCNormalizePlanarYUVLayerKernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     GCNormalizePlanarYUVLayerKernel &operator=(GCNormalizePlanarYUVLayerKernel &&) = default;
     /** Default destructor */
     ~GCNormalizePlanarYUVLayerKernel() = default;
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h
index 3b01b4ad4d..3fb24825a4 100644
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,9 +39,9 @@ class GCPixelWiseMultiplicationKernel : public IGCKernel
 public:
     /** Default constructor.*/
     GCPixelWiseMultiplicationKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     GCPixelWiseMultiplicationKernel(const GCPixelWiseMultiplicationKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     GCPixelWiseMultiplicationKernel &operator=(const GCPixelWiseMultiplicationKernel &) = delete;
     /** Allow instances of this class to be moved */
     GCPixelWiseMultiplicationKernel(GCPixelWiseMultiplicationKernel &&) = default;
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h
index bf315a2f15..bf535229ff 100644
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h
@@ -28,6 +28,29 @@
 
 namespace arm_compute
 {
+/** GLES Compute kernel to perform reshaping on the weights used by convolution and locally connected layer
+ *
+ * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels.
+ * In combination with the @ref GCIm2ColKernel can transform a convolution to a matrix multiplication.
+ *
+ * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have:
+ * @f[
+ * \left( \begin{array}{ccc}
+ * a000 & a001 & a002 \\
+ * a010 & a011 & a012 \\
+ * a020 & a021 & a022 \\
+ * \end{array} \right)
+ * \left( \begin{array}{ccc}
+ * a100 & a101 & a102 \\
+ * a110 & a111 & a112 \\
+ * a120 & a121 & a122 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccc}
+ * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\
+ * \end{array} \right)
+ * @f]
+ */
 class GCWeightsReshapeKernel : public IGCKernel
 {
 public:
diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h
index 1554f63904..6f3aba7077 100644
--- a/arm_compute/core/Helpers.h
+++ b/arm_compute/core/Helpers.h
@@ -48,18 +48,21 @@ class IKernel;
 class ITensor;
 class ITensorInfo;
 
+/** Disable bitwise operations by default */
 template <typename T>
 struct enable_bitwise_ops
 {
-    static constexpr bool value = false;
+    static constexpr bool value = false; /**< Disabled */
 };
 
+#ifndef DOXYGEN_SKIP_THIS
 template <typename T>
 typename std::enable_if<enable_bitwise_ops<T>::value, T>::type operator&(T lhs, T rhs)
 {
     using underlying_type = typename std::underlying_type<T>::type;
     return static_cast<T>(static_cast<underlying_type>(lhs) & static_cast<underlying_type>(rhs));
 }
+#endif /* DOXYGEN_SKIP_THIS */
 
 namespace traits
 {
@@ -532,7 +535,7 @@ inline void permute(TensorShape &shape, const PermutationVector &perm)
     }
 }
 
-/* Auto initialize the tensor info (shape, number of channels, data type and fixed point position) if the current assignment is empty.
+/** Auto initialize the tensor info (shape, number of channels, data type and fixed point position) if the current assignment is empty.
  *
  * @param[in,out] info                 Tensor info used to check and assign.
  * @param[in]     shape                New shape.
@@ -558,7 +561,7 @@ bool auto_init_if_empty(ITensorInfo       &info,
  */
 bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source);
 
-/* Set the shape to the specified value if the current assignment is empty.
+/** Set the shape to the specified value if the current assignment is empty.
  *
  * @param[in,out] info  Tensor info used to check and assign.
  * @param[in]     shape New shape.
@@ -567,7 +570,7 @@ bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source);
  */
 bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape);
 
-/* Set the format, data type and number of channels to the specified value if
+/** Set the format, data type and number of channels to the specified value if
  * the current data type is unknown.
  *
  * @param[in,out] info   Tensor info used to check and assign.
@@ -577,7 +580,7 @@ bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape);
  */
 bool set_format_if_unknown(ITensorInfo &info, Format format);
 
-/* Set the data type and number of channels to the specified value if
+/** Set the data type and number of channels to the specified value if
  * the current data type is unknown.
  *
  * @param[in,out] info      Tensor info used to check and assign.
@@ -587,7 +590,7 @@ bool set_format_if_unknown(ITensorInfo &info, Format format);
  */
 bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type);
 
-/* Set the data layout to the specified value if
+/** Set the data layout to the specified value if
  * the current data layout is unknown.
  *
  * @param[in,out] info        Tensor info used to check and assign.
@@ -597,7 +600,7 @@ bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type);
  */
 bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout);
 
-/* Set the fixed point position to the specified value if
+/** Set the fixed point position to the specified value if
  * the current fixed point position is 0 and the data type is QS8 or QS16
  *
  * @param[in,out] info                 Tensor info used to check and assign.
@@ -607,7 +610,7 @@ bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout);
  */
 bool set_fixed_point_position_if_zero(ITensorInfo &info, int fixed_point_position);
 
-/* Set the quantization info to the specified value if
+/** Set the quantization info to the specified value if
  * the current quantization info is empty and the data type of asymmetric quantized type
  *
  * @param[in,out] info              Tensor info used to check and assign.
@@ -647,10 +650,10 @@ inline Coordinates index2coords(const TensorShape &shape, int index);
  */
 inline int coords2index(const TensorShape &shape, const Coordinates &coord);
 
-/* Get the index of the given dimension.
+/** Get the index of the given dimension.
  *
- * @param[in] info        Tensor info used to check and assign.
- * @param[in] data_layout New data layout.
+ * @param[in] data_layout           The data layout.
+ * @param[in] data_layout_dimension The dimension which this index is requested for.
  *
  * @return The int conversion of the requested data layout index.
  */
diff --git a/arm_compute/core/IAccessWindow.h b/arm_compute/core/IAccessWindow.h
index 4bbcbb3a40..ab05fbf5e4 100644
--- a/arm_compute/core/IAccessWindow.h
+++ b/arm_compute/core/IAccessWindow.h
@@ -71,12 +71,14 @@ inline int adjust_up(int required, int available, int step)
 class IAccessWindow
 {
 public:
+    /** Default virtual destructor */
     virtual ~IAccessWindow() = default;
     /** Shrink the window if padding is not large enough.
      *
      * @param[in] window Window used by the kernel.
      *
      * @return True if the window has been changed.
+     *
      */
     virtual bool update_window_if_needed(Window &window) const = 0;
     /** Increase the padding to be large enough for the window.
@@ -94,6 +96,9 @@ public:
      * @param[in] input_valid_region Combined valid region of all inputs.
      * @param[in] border_undefined   Undefined borders are excluded from the valid region.
      * @param[in] border_size        Size of the border around the XY-plane of the tensor.
+     *
+     * @return a valid region.
+     *
      */
     virtual ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const = 0;
 };
@@ -138,11 +143,16 @@ public:
         ARM_COMPUTE_ERROR_ON(scale_y < 0);
     }
 
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     AccessWindowRectangle(const AccessWindowRectangle &) = delete;
-    AccessWindowRectangle(AccessWindowRectangle &&)      = delete;
+    /** Allow instances of this class to be move constructed */
+    AccessWindowRectangle(AccessWindowRectangle &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     AccessWindowRectangle &operator=(const AccessWindowRectangle &) = delete;
+    /** Allow instances of this class to be moved */
     AccessWindowRectangle &operator=(AccessWindowRectangle &&) = default;
-    ~AccessWindowRectangle()                                   = default;
+    /** Default destructor */
+    ~AccessWindowRectangle() = default;
 
     /** Set the valid region based on access pattern, valid region of the inputs and border mode.
      *
@@ -159,12 +169,26 @@ public:
      *
      * @param[in] window             Execution window of the kernel.
      * @param[in] input_valid_region Combined valid region of all inputs.
+     *
+     * @return a valid region.
+     *
      */
     ValidRegion compute_valid_region(const Window &window, const ValidRegion &input_valid_region) const;
 
     // Inherited methods overridden:
 
-    /** @note This method assumes that all elements written by the kernel are valid. */
+    /** Compute the valid region based on access pattern and valid region of the inputs.
+     *
+     * @note This method assumes that all elements written by the kernel are valid.
+     *
+     * @param[in] window             Execution window of the kernel.
+     * @param[in] input_valid_region Combined valid region of all inputs.
+     * @param[in] border_undefined   Undefined borders are excluded from the valid region.
+     * @param[in] border_size        Size of the border around the XY-plane of the tensor.
+     *
+     * @return a valid region.
+     *
+     */
     ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
 
     bool update_window_if_needed(Window &window) const override;
diff --git a/arm_compute/core/IArray.h b/arm_compute/core/IArray.h
index bc01df9817..f9e09a308b 100644
--- a/arm_compute/core/IArray.h
+++ b/arm_compute/core/IArray.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -136,16 +136,27 @@ private:
     size_t _num_values;
     size_t _max_size;
 };
-using IKeyPointArray        = IArray<KeyPoint>;
-using ICoordinates2DArray   = IArray<Coordinates2D>;
-using IROIArray             = IArray<ROI>;
+/** Interface for Array of Key Points. */
+using IKeyPointArray = IArray<KeyPoint>;
+/** Interface for Array of 2D Coordinates. */
+using ICoordinates2DArray = IArray<Coordinates2D>;
+/** Interface for Array of Detection Windows. */
 using IDetectionWindowArray = IArray<DetectionWindow>;
-using ISize2DArray          = IArray<Size2D>;
-using IUInt8Array           = IArray<uint8_t>;
-using IUInt16Array          = IArray<uint16_t>;
-using IUInt32Array          = IArray<uint32_t>;
-using IInt16Array           = IArray<int16_t>;
-using IInt32Array           = IArray<int32_t>;
-using IFloatArray           = IArray<float>;
+/** Interface for Array of ROIs. */
+using IROIArray = IArray<ROI>;
+/** Interface for Array of 2D Sizes. */
+using ISize2DArray = IArray<Size2D>;
+/** Interface for Array of uint8s. */
+using IUInt8Array = IArray<uint8_t>;
+/** Interface for Array of uint16s. */
+using IUInt16Array = IArray<uint16_t>;
+/** Interface for Array of uint32s. */
+using IUInt32Array = IArray<uint32_t>;
+/** Interface for Array of int16s. */
+using IInt16Array = IArray<int16_t>;
+/** Interface for Array of int32s. */
+using IInt32Array = IArray<int32_t>;
+/** Interface for Array of floats. */
+using IFloatArray = IArray<float>;
 }
 #endif /* __ARM_COMPUTE_IARRAY_H__ */
diff --git a/arm_compute/core/NEON/INEKernel.h b/arm_compute/core/NEON/INEKernel.h
index 529606a709..32d7ab6338 100644
--- a/arm_compute/core/NEON/INEKernel.h
+++ b/arm_compute/core/NEON/INEKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,6 +28,7 @@
 
 namespace arm_compute
 {
+/** Common interface for all kernels implemented in NEON. */
 using INEKernel = ICPPKernel;
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_INEKERNEL_H__ */
diff --git a/arm_compute/core/NEON/INESimpleKernel.h b/arm_compute/core/NEON/INESimpleKernel.h
index 0d2211ac32..15fc3be5ed 100644
--- a/arm_compute/core/NEON/INESimpleKernel.h
+++ b/arm_compute/core/NEON/INESimpleKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,6 +28,7 @@
 
 namespace arm_compute
 {
+/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */
 using INESimpleKernel = ICPPSimpleKernel;
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_INESIMPLEKERNEL_H__ */
diff --git a/arm_compute/core/NEON/NEColorConvertHelper.inl b/arm_compute/core/NEON/NEColorConvertHelper.inl
index 9a9caefaab..0da5affe18 100644
--- a/arm_compute/core/NEON/NEColorConvertHelper.inl
+++ b/arm_compute/core/NEON/NEColorConvertHelper.inl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,7 @@
 
 namespace
 {
+#ifndef DOXYGEN_SKIP_THIS
 constexpr float red_coef_bt709    = 1.5748F;
 constexpr float green_coef_bt709  = -0.1873f;
 constexpr float green_coef2_bt709 = -0.4681f;
@@ -296,10 +297,18 @@ inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, co
     vst1q_u8(out_u, uvec);
     vst1q_u8(out_v, vvec);
 }
+#endif /* DOXYGEN_SKIP_THIS */
 }
 
 namespace arm_compute
 {
+/** Convert RGB to RGBX.
+ *
+ * @param[in]  input  Input RGB data buffer.
+ * @param[out] output Output RGBX buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
 void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win)
 {
     ARM_COMPUTE_ERROR_ON(nullptr == input);
@@ -324,6 +333,13 @@ void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict out
     in, out);
 }
 
+/** Convert RGBX to RGB.
+ *
+ * @param[in]  input  Input RGBX data buffer.
+ * @param[out] output Output RGB buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
 void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win)
 {
     ARM_COMPUTE_ERROR_ON(nullptr == input);
@@ -347,6 +363,13 @@ void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win
     in, out);
 }
 
+/** Convert YUYV to RGB.
+ *
+ * @param[in]  input  Input YUYV data buffer.
+ * @param[out] output Output RGB buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
 template <bool yuyv, bool alpha>
 void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
 {
@@ -385,6 +408,13 @@ void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict out
     in, out);
 }
 
+/** Convert NV12 to RGB.
+ *
+ * @param[in]  input  Input NV12 data buffer.
+ * @param[out] output Output RGB buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
 template <bool uv, bool alpha>
 void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
 {
@@ -441,6 +471,13 @@ void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict out
     in_y, in_uv, out);
 }
 
+/** Convert IYUV to RGB.
+ *
+ * @param[in]  input  Input IYUV data buffer.
+ * @param[out] output Output RGB buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
 template <bool alpha>
 void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
 {
@@ -498,6 +535,13 @@ void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict out
     in_y, in_u, in_v, out);
 }
 
+/** Convert YUYV to NV12.
+ *
+ * @param[in]  input  Input YUYV data buffer.
+ * @param[out] output Output NV12 buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
 template <bool yuyv>
 void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
 {
@@ -547,6 +591,13 @@ void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict ou
     in, out_y, out_uv);
 }
 
+/** Convert IYUV to NV12.
+ *
+ * @param[in]  input  Input IYUV data buffer.
+ * @param[out] output Output NV12 buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
 void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
 {
     ARM_COMPUTE_ERROR_ON(nullptr == input);
@@ -587,6 +638,13 @@ void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict ou
     in_y, in_u, in_v, out_y, out_uv);
 }
 
+/** Convert NV12 to IYUV.
+ *
+ * @param[in]  input  Input NV12 data buffer.
+ * @param[out] output Output IYUV buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
 template <bool uv>
 void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
 {
@@ -629,6 +687,13 @@ void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict ou
     in_y, in_uv, out_y, out_u, out_v);
 }
 
+/** Convert YUYV to IYUV.
+ *
+ * @param[in]  input  Input YUYV data buffer.
+ * @param[out] output Output IYUV buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
 template <bool yuyv>
 void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
 {
@@ -682,6 +747,13 @@ void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict ou
     in, out_y, out_u, out_v);
 }
 
+/** Convert NV12 to YUV4.
+ *
+ * @param[in]  input  Input NV12 data buffer.
+ * @param[out] output Output YUV4 buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
 template <bool uv>
 void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
 {
@@ -734,6 +806,13 @@ void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict ou
     in_y, in_uv, out_y, out_u, out_v);
 }
 
+/** Convert IYUV to YUV4.
+ *
+ * @param[in]  input  Input IYUV data buffer.
+ * @param[out] output Output YUV4 buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
 void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
 {
     ARM_COMPUTE_ERROR_ON(nullptr == input);
@@ -785,6 +864,13 @@ void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict ou
     in_y, in_u, in_v, out_y, out_u, out_v);
 }
 
+/** Convert RGB to NV12.
+ *
+ * @param[in]  input  Input RGB data buffer.
+ * @param[out] output Output NV12 buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
 template <bool alpha>
 void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
 {
@@ -821,6 +907,13 @@ void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict out
     in, out_y, out_uv);
 }
 
+/** Convert RGB to IYUV.
+ *
+ * @param[in]  input  Input RGB data buffer.
+ * @param[out] output Output IYUV buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
 template <bool alpha>
 void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
 {
@@ -858,6 +951,13 @@ void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict out
     in, out_y, out_u, out_v);
 }
 
+/** Convert RGB to YUV4.
+ *
+ * @param[in]  input  Input RGB data buffer.
+ * @param[out] output Output YUV4 buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
 template <bool alpha>
 void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
 {
diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl
index 966313d58b..b86c3cbec3 100644
--- a/arm_compute/core/NEON/NEFixedPoint.inl
+++ b/arm_compute/core/NEON/NEFixedPoint.inl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -130,6 +130,7 @@ static const std::array<qint16x8_t, 4> log_tabq_qs16 =
     }
 };
 
+#ifndef DOXYGEN_SKIP_THIS
 inline qint8x8_t vget_low_qs8(qint8x16_t a)
 {
     return vget_low_s8(a);
@@ -1996,4 +1997,5 @@ inline float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b)
     };
     return res;
 }
+#endif /* DOXYGEN_SKIP_THIS */
 } // namespace arm_compute
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index 50f217c1f1..84154020a5 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,7 @@
 
 namespace arm_compute
 {
-/* Exponent polynomial coefficients */
+/** Exponent polynomial coefficients */
 const std::array<float32x4_t, 8> exp_tab =
 {
     {
@@ -39,7 +39,7 @@ const std::array<float32x4_t, 8> exp_tab =
     }
 };
 
-/* Logarithm polynomial coefficients */
+/** Logarithm polynomial coefficients */
 const std::array<float32x4_t, 8> log_tab =
 {
     {
@@ -54,6 +54,7 @@ const std::array<float32x4_t, 8> log_tab =
     }
 };
 
+#ifndef DOXYGEN_SKIP_THIS
 inline float32x4_t vfloorq_f32(float32x4_t val)
 {
     static const float32x4_t CONST_1 = vdupq_n_f32(1.f);
@@ -168,8 +169,10 @@ inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n)
 {
     return vexpq_f32(vmulq_f32(n, vlogq_f32(val)));
 }
+#endif /* DOXYGEN_SKIP_THIS */
+
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/* Exponent polynomial coefficients */
+/** Exponent polynomial coefficients */
 const std::array<float16x8_t, 8> exp_tab_f16 =
 {
     {
@@ -184,7 +187,7 @@ const std::array<float16x8_t, 8> exp_tab_f16 =
     }
 };
 
-/* Logarithm polynomial coefficients */
+/** Logarithm polynomial coefficients */
 const std::array<float16x8_t, 8> log_tab_f16 =
 {
     {
@@ -199,6 +202,7 @@ const std::array<float16x8_t, 8> log_tab_f16 =
     }
 };
 
+#ifndef DOXYGEN_SKIP_THIS
 inline float16x4_t vinvsqrt_f16(float16x4_t x)
 {
     float16x4_t sqrt_reciprocal = vrsqrte_f16(x);
@@ -301,5 +305,6 @@ inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n)
 {
     return vexpq_f16(vmulq_f16(n, vlogq_f16(val)));
 }
+#endif /* DOXYGEN_SKIP_THIS */
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace arm_compute
diff --git a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h
index ad5a16c9f3..82a4199761 100644
--- a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h
+++ b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h
@@ -101,6 +101,7 @@ public:
     void run(const Window &window, const ThreadInfo &info) override;
 };
 #else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+/** Interface for the accumulate weighted kernel using F16 */
 using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel;
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
index 63eb739487..2408a665e4 100644
--- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
@@ -47,7 +47,7 @@ public:
     NEBatchNormalizationLayerKernel &operator=(const NEBatchNormalizationLayerKernel &) = delete;
     /** Default Move Constructor. */
     NEBatchNormalizationLayerKernel(NEBatchNormalizationLayerKernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     NEBatchNormalizationLayerKernel &operator=(NEBatchNormalizationLayerKernel &&) = default;
     /** Default destructor */
     ~NEBatchNormalizationLayerKernel() = default;
diff --git a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h
index 9c139551cb..2f93fd2480 100644
--- a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h
+++ b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h
@@ -51,7 +51,7 @@ public:
 };
 
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** NEON kernel to perform a Box 3x3 filter using F16 simd
+/** NEON kernel to perform a Box 3x3 filter for FP16 datatype
  */
 class NEBox3x3FP16Kernel : public NEBox3x3Kernel
 {
@@ -64,6 +64,7 @@ public:
     void run(const Window &window, const ThreadInfo &info) override;
 };
 #else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+/** NEON kernel to perform a Box 3x3 filter for FP16 datatype */
 using NEBox3x3FP16Kernel = NEBox3x3Kernel;
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace arm_compute
diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h
index 401b9e47af..58ef1757fe 100644
--- a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h
+++ b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h
@@ -86,7 +86,7 @@ protected:
 };
 
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** NEON kernel to perform Gradient computation
+/** NEON kernel to perform Gradient computation for FP16 datatype
  */
 class NEGradientFP16Kernel : public NEGradientKernel
 {
@@ -99,6 +99,7 @@ public:
     void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type) override;
 };
 #else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+/** NEON kernel to perform Gradient computation for FP16 datatype */
 using NEGradientFP16Kernel = NEGradientKernel;
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
index 5871cc5dcb..0c2f30a98c 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
@@ -49,7 +49,7 @@ public:
     NEDepthwiseConvolutionLayer3x3Kernel &operator=(const NEDepthwiseConvolutionLayer3x3Kernel &) = delete;
     /** Default Move Constructor. */
     NEDepthwiseConvolutionLayer3x3Kernel(NEDepthwiseConvolutionLayer3x3Kernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     NEDepthwiseConvolutionLayer3x3Kernel &operator=(NEDepthwiseConvolutionLayer3x3Kernel &&) = default;
     /** Initialize the function's source, destination, conv and border_size.
      *
diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
index 7ee2078e9e..f48e76f340 100644
--- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
@@ -50,7 +50,7 @@ public:
     NEDequantizationLayerKernel &operator=(const NEDequantizationLayerKernel &) = delete;
     /** Default Move Constructor. */
     NEDequantizationLayerKernel(NEDequantizationLayerKernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     NEDequantizationLayerKernel &operator=(NEDequantizationLayerKernel &&) = default;
     /** Default destructor */
     ~NEDequantizationLayerKernel() = default;
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h
index 286be1acc9..a05d591850 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 class ITensor;
 
+/** Interface for the GEMM matrix vector multiply kernel. **/
 class NEGEMMMatrixVectorMultiplyKernel : public INESimpleKernel
 {
 public:
diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h
index 8037e41695..aabf8b312b 100644
--- a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h
+++ b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h
@@ -127,6 +127,7 @@ private:
     HarrisScoreFunction *_func;
 };
 #else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+/** Interface for the accumulate Weighted kernel using FP16 */
 template <int32_t block_size>
 using NEHarrisScoreFP16Kernel = NEHarrisScoreKernel<block_size>;
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
diff --git a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h
index f2105582eb..9a8947f9a0 100644
--- a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h
+++ b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h
@@ -45,6 +45,7 @@ struct NELKInternalKeypoint
     bool  tracking_status{ false }; /**< the tracking status of the keypoint */
 };
 
+/** Interface for NEON Array of Internal Key Points. */
 using INELKInternalKeypointArray = IArray<NELKInternalKeypoint>;
 
 /** Interface for the Lucas-Kanade tracker kernel */
diff --git a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h
index 522ed54f95..696721673d 100644
--- a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h
+++ b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h
@@ -165,6 +165,7 @@ private:
     ITensor                  *_phase;     /**< Output - Phase */
 };
 #else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+/** Template interface for the kernel to compute magnitude and phase */
 template <MagnitudeType mag_type, PhaseType phase_type>
 using NEMagnitudePhaseFP16Kernel = NEMagnitudePhaseKernel<mag_type, phase_type>;
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
index f122ed15fd..588de49316 100644
--- a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
+++ b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
@@ -83,7 +83,7 @@ protected:
 };
 
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in F16 if the input data type is F32
+/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32
  */
 class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel
 {
@@ -101,6 +101,7 @@ public:
     void configure(const ITensor *input, ITensor *output, bool border_undefined);
 };
 #else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */
 using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel;
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace arm_compute
diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
index b835ca7c53..6ae7b73423 100644
--- a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
@@ -47,7 +47,7 @@ public:
     NENormalizationLayerKernel &operator=(const NENormalizationLayerKernel &) = delete;
     /** Default Move Constructor. */
     NENormalizationLayerKernel(NENormalizationLayerKernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default;
     /** Default destructor */
     ~NENormalizationLayerKernel() = default;
diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
index e7cf0a8ca4..ca7658bb7e 100644
--- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
@@ -50,7 +50,7 @@ public:
     NEQuantizationLayerKernel &operator=(const NEQuantizationLayerKernel &) = delete;
     /** Default Move Constructor. */
     NEQuantizationLayerKernel(NEQuantizationLayerKernel &&) = default;
-    /** Default move assignment operator. */
+    /** Default move assignment operator */
     NEQuantizationLayerKernel &operator=(NEQuantizationLayerKernel &&) = default;
     /** Default destructor */
     ~NEQuantizationLayerKernel() = default;
diff --git a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h
index 9169b75d19..2f44d19b4f 100644
--- a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h
@@ -34,6 +34,7 @@ namespace arm_compute
 {
 class ITensor;
 
+/** Interface for the NEON kernel to perform Winograd input transform. */
 template <typename T>
 class INEWinogradLayerTransformInputKernel : public INEKernel
 {
@@ -46,6 +47,8 @@ public:
      * @param[in] n_rows       Number of rows in each feature map.
      * @param[in] n_cols       Number of columns in each feature map.
      * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
+     *
+     * @return Storage size (in units of TIn) required.
      */
     virtual unsigned int get_input_storage_size(int n_batches, int n_channels, int n_rows, int n_cols, bool same_padding) const = 0;
 
@@ -72,11 +75,13 @@ public:
      */
     virtual void configure(const T *const input, const int n_batches, const int n_rows, const int n_cols, const int n_channels, const PaddingType padding, T *const output, const int matrix_stride) = 0;
 
+    /** Destructor */
     virtual ~INEWinogradLayerTransformInputKernel()
     {
     }
 };
 
+/** NEON kernel to perform Winograd input transform. */
 template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
 class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel<T>
 {
@@ -89,6 +94,8 @@ public:
      * @param[in] n_rows       Number of rows in each feature map.
      * @param[in] n_cols       Number of columns in each feature map.
      * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
+     *
+     * @return Storage size (in units of TIn) required.
      */
     unsigned int get_input_storage_size(
         int  n_batches,
@@ -107,6 +114,7 @@ public:
      */
     int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
 
+    /** Default constructor */
     NEWinogradLayerTransformInputKernel();
 
     const char *name() const override
@@ -139,7 +147,9 @@ public:
     void run(const Window &window, const ThreadInfo &info) override;
     bool is_parallelisable() const override;
 
+    /** Winograd base kernel */
     using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelCols, KernelCols>;
+    /** Winograd convolution kernel */
     using WinogradConv = typename WinogradBase::template Convolution<T, T>;
 
 private:
@@ -147,6 +157,7 @@ private:
     std::unique_ptr<InputTransform> _transform;
 };
 
+/** Interface for the NEON kernel to perform Winograd output transform. */
 template <typename T>
 class INEWinogradLayerTransformOutputKernel : public INEKernel
 {
@@ -159,6 +170,8 @@ public:
      * @param[in] n_cols            Number of columns in each feature map of the input tensor.
      * @param[in] n_output_channels Number of feature maps in the output tensor.
      * @param[in] same_padding      Use "SAME" padding, otherwise use "VALID".
+     *
+     * @return Storage size (in units of TOut) required.
      */
     virtual unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const = 0;
 
@@ -208,6 +221,7 @@ public:
     }
 };
 
+/** NEON kernel to perform Winograd output transform. */
 template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
 class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel<T>
 {
@@ -227,7 +241,7 @@ public:
     NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
     /** Allow instances of this class to be moved */
     NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
-
+    /** Default destructor */
     ~NEWinogradLayerTransformOutputKernel() = default;
 
     // Inherited methods overridden:
@@ -239,6 +253,8 @@ public:
      * @param[in] n_cols            Number of columns in each feature map of the input tensor.
      * @param[in] n_output_channels Number of feature maps in the output tensor.
      * @param[in] same_padding      Use "SAME" padding, otherwise use "VALID".
+     *
+     * @return Storage size (in units of TOut) required.
      */
     unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const override;
 
@@ -301,6 +317,7 @@ private:
     int            _n_channels;
 };
 
+/** Interface for the NEON kernel to perform Winograd weights transform. */
 template <typename T>
 class INEWinogradLayerTransformWeightsKernel : public INEKernel
 {
@@ -310,6 +327,8 @@ public:
      *
      * @param[in] n_output_channels Number of output feature maps.
      * @param[in] n_input_channels  Number of input feature maps.
+     *
+     * @return Storage size (in units of T) required.
      */
     virtual unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const = 0;
     /** Gets the stride between matrices in the kernel worspace
@@ -335,10 +354,12 @@ public:
     }
 };
 
+/** NEON kernel to perform Winograd weights transform. */
 template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
 class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel<T>
 {
 public:
+    /** Default constructor. */
     NEWinogradLayerTransformWeightsKernel();
     const char *name() const override
     {
@@ -359,6 +380,7 @@ private:
     std::unique_ptr<WeightsTransform> _transform;
 };
 
+/** Interface for the NEON kernel to perform Winograd. */
 template <typename TIn, typename TOut>
 class INEWinogradLayerBatchedGEMMKernel : public INEKernel
 {
@@ -406,16 +428,17 @@ public:
     virtual int get_number_blocks() const = 0;
 };
 
+/** NEON kernel to perform Winograd. */
 template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
 class NEWinogradLayerBatchedGEMMKernel : public INEWinogradLayerBatchedGEMMKernel<TIn, TOut>
 {
 public:
+    /** Winograd base kernel */
     using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
+    /** Winograd convolution kernel */
     using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
-    using MultiGEMM    = winograd::BatchedBlockedGemm<WinogradConv::M_BLOCK, WinogradConv::N_BLOCK, TIn, TOut>;
-
-    static const int _output_tile_rows = OutputTileRows;
-    static const int _output_tile_cols = OutputTileCols;
+    /** Winograd batched blocked GEMM operator */
+    using MultiGEMM = winograd::BatchedBlockedGemm<WinogradConv::M_BLOCK, WinogradConv::N_BLOCK, TIn, TOut>;
 
     const char *name() const override
     {
@@ -432,7 +455,7 @@ public:
     NEWinogradLayerBatchedGEMMKernel(NEWinogradLayerBatchedGEMMKernel &&) = default;
     /** Allow instances of this class to be moved */
     NEWinogradLayerBatchedGEMMKernel &operator=(NEWinogradLayerBatchedGEMMKernel &&) = default;
-
+    /** Default destructor. */
     ~NEWinogradLayerBatchedGEMMKernel() = default;
 
     // Inherited methods overridden:
@@ -474,6 +497,8 @@ public:
     void run(const Window &window, const ThreadInfo &info) override;
 
 private:
+    static const int           _output_tile_rows = OutputTileRows;
+    static const int           _output_tile_cols = OutputTileCols;
     std::unique_ptr<MultiGEMM> _gemms;
 };
 
diff --git a/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h b/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h
index e4d3f54943..71d5a9eef7 100644
--- a/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h
+++ b/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h
@@ -30,17 +30,25 @@ namespace arm_compute
 {
 namespace detail
 {
-// Dummy activation object
 /** Dummy activation object */
 template <typename T, int S>
 struct dummy
 {
+    /** NEON vector type. */
     using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
 
+    /** Construct a dummy activation object.
+     *
+     * @param[in] act_info Activation layer information.
+     */
     explicit dummy(ActivationLayerInfo act_info)
     {
         ARM_COMPUTE_UNUSED(act_info);
     }
+    /** Run activation function.
+     *
+     * @param[in] vval Vector of values.
+     */
     void operator()(ExactType &vval)
     {
         ARM_COMPUTE_UNUSED(vval);
@@ -50,62 +58,97 @@ struct dummy
 template <typename T, int S>
 struct relu
 {
-    using ExactType    = typename wrapper::traits::neon_vector<T, S>::type;
+    /** NEON vector type. */
+    using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
+    /** NEON vector tag type. */
     using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
 
+    /** Construct a RELU activation object.
+     *
+     * @param[in] act_info Activation layer information.
+     */
     explicit relu(ActivationLayerInfo act_info)
         : vzero(wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{}))
     {
         ARM_COMPUTE_UNUSED(act_info);
     }
 
+    /** Run activation function.
+     *
+     * @param[in] vval Vector of values.
+     */
     void operator()(ExactType &vval)
     {
         vval = wrapper::vmax(vzero, vval);
     }
 
+    /** Vector of zeroes. */
     const ExactType vzero;
 };
 /** Bounded RELU activation object */
 template <typename T, int S>
 struct brelu
 {
-    using ExactType    = typename wrapper::traits::neon_vector<T, S>::type;
+    /** NEON vector type. */
+    using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
+    /** NEON vector tag type. */
     using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
 
+    /** Construct a bounded RELU activation object.
+     *
+     * @param[in] act_info Activation layer information.
+     */
     explicit brelu(ActivationLayerInfo act_info)
         : vzero(wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{})),
           valpha(wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{}))
     {
     }
 
+    /** Run activation function.
+     *
+     * @param[in] vval Vector of values.
+     */
     void operator()(ExactType &vval)
     {
         vval = wrapper::vmin(valpha, wrapper::vmax(vzero, vval));
     }
 
+    /** Vector of zeroes. */
     const ExactType vzero;
+    /** Vector of alphas. */
     const ExactType valpha;
 };
 /** Lower-Upper Bounded RELU activation object */
 template <typename T, int S>
 struct lubrelu
 {
-    using ExactType    = typename wrapper::traits::neon_vector<T, S>::type;
+    /** NEON vector type. */
+    using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
+    /** NEON vector tag type. */
     using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
 
+    /** Construct a lower-upper bounded RELU activation object.
+     *
+     * @param[in] act_info Activation layer information.
+     */
     explicit lubrelu(ActivationLayerInfo act_info)
         : valpha(wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{})),
           vbeta(wrapper::vdup_n(static_cast<T>(act_info.b()), ExactTagType{}))
     {
     }
 
+    /** Run activation function.
+     *
+     * @param[in] vval Vector of values.
+     */
     void operator()(ExactType &vval)
     {
         vval = wrapper::vmin(valpha, wrapper::vmax(vbeta, vval));
     }
 
+    /** Vector of alphas. */
     const ExactType valpha;
+    /** Vector of betas. */
     const ExactType vbeta;
 };
 } // namespace detail
diff --git a/arm_compute/core/NEON/wrapper/traits.h b/arm_compute/core/NEON/wrapper/traits.h
index 08b2c9b48f..495ddbb1af 100644
--- a/arm_compute/core/NEON/wrapper/traits.h
+++ b/arm_compute/core/NEON/wrapper/traits.h
@@ -42,7 +42,8 @@ struct vector_128_tag {};
 
 /** Create the appropriate NEON vector given its type and size */
 template <typename T, int S> struct neon_vector;
-/** Specializations */
+// Specializations
+#ifndef DOXYGEN_SKIP_THIS
 template <> struct neon_vector<uint8_t, 8>{ using type = uint8x8_t; using tag_type = vector_64_tag; };
 template <> struct neon_vector<int8_t, 8>{ using type = int8x8_t; using tag_type = vector_64_tag; };
 template <> struct neon_vector<uint8_t, 16>{ using type = uint8x16_t; using tag_type = vector_128_tag; };
@@ -61,6 +62,7 @@ template <> struct neon_vector<uint64_t, 2>{ using type = uint64x2_t; using tag_
 template <> struct neon_vector<int64_t, 2>{ using type = int64x2_t; using tag_type = vector_128_tag; };
 template <> struct neon_vector<float_t, 2>{ using type = float32x2_t; using tag_type = vector_64_tag; };
 template <> struct neon_vector<float_t, 4>{ using type = float32x4_t; using tag_type = vector_128_tag; };
+#endif /* DOXYGEN_SKIP_THIS */
 
 /**  Helper type template to get the type of a neon vector */
 template <typename T, int S> using neon_vector_t = typename neon_vector<T, S>::type;
diff --git a/arm_compute/core/QAsymm8.inl b/arm_compute/core/QAsymm8.inl
index 611d68eb23..77109c4010 100644
--- a/arm_compute/core/QAsymm8.inl
+++ b/arm_compute/core/QAsymm8.inl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,6 +26,7 @@
 
 namespace arm_compute
 {
+#ifndef DOXYGEN_SKIP_THIS
 inline qasymm8_t sqcvt_qasymm8_f32(float value, float scale, int offset, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
 {
     int quantized = arm_compute::round(value / scale, rounding_policy) + offset;
@@ -38,4 +39,5 @@ inline float scvt_f32_qasymm8(qasymm8_t value, float scale, int offset)
     float dequantized = (static_cast<int>(value) - offset) * scale;
     return dequantized;
 }
+#endif /* DOXYGEN_SKIP_THIS */
 }
diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h
index 97f9d03dc7..f8cfb35357 100644
--- a/arm_compute/core/TensorInfo.h
+++ b/arm_compute/core/TensorInfo.h
@@ -210,6 +210,8 @@ public:
      * @param[in] hog_info HOG's metadata used to allocate normalized HOG space
      * @param[in] width    Width of the 2D tensor where the HOG descriptor will be computed on
      * @param[in] height   Height of the 2D tensor where the HOG descriptor will be computed on
+     *
+     * @return Total allocation size including padding in bytes.
      */
     size_t init_auto_padding(const HOGInfo &hog_info, unsigned int width, unsigned int height);
 
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index ae88e607c3..73baf78918 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -71,23 +71,23 @@ enum class Format
 /** Available data types */
 enum class DataType
 {
-    UNKNOWN,
-    U8,
-    S8,
-    QS8,
-    QASYMM8,
-    U16,
-    S16,
-    QS16,
-    U32,
-    S32,
-    QS32,
-    U64,
-    S64,
-    F16,
-    F32,
-    F64,
-    SIZET
+    UNKNOWN, /**< Unknown data type */
+    U8,      /**< unsigned 8-bit number */
+    S8,      /**< signed 8-bit number */
+    QS8,     /**< quantized, symmetric fixed-point 8-bit number */
+    QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number */
+    U16,     /**< unsigned 16-bit number */
+    S16,     /**< signed 16-bit number */
+    QS16,    /**< quantized, symmetric fixed-point 16-bit number */
+    U32,     /**< unsigned 32-bit number */
+    S32,     /**< signed 32-bit number */
+    QS32,    /**< quantized, symmetric fixed-point 32-bit number */
+    U64,     /**< unsigned 64-bit number */
+    S64,     /**< signed 64-bit number */
+    F16,     /**< 16-bit floating-point number */
+    F32,     /**< 32-bit floating-point number */
+    F64,     /**< 64-bit floating-point number */
+    SIZET    /**< size_t */
 };
 
 /** Available Sampling Policies */
@@ -100,47 +100,65 @@ enum class SamplingPolicy
 /** Constant value of the border pixels when using BorderMode::CONSTANT */
 constexpr uint8_t CONSTANT_BORDER_VALUE = 199;
 
-/* Constant value used to indicate a half-scale pyramid */
+/** Constant value used to indicate a half-scale pyramid */
 constexpr float SCALE_PYRAMID_HALF = 0.5f;
 
-/* Constant value used to indicate a ORB scaled pyramid */
+/** Constant value used to indicate a ORB scaled pyramid */
 constexpr float SCALE_PYRAMID_ORB = 8.408964152537146130583778358414e-01;
 
 /** Supported tensor data layouts */
 enum class DataLayout
 {
-    UNKNOWN,
-    NCHW,
-    NHWC
+    UNKNOWN, /**< Unknown data layout */
+    NCHW,    /**< Num samples, channels, height, width */
+    NHWC     /**< Num samples, height, width, channels */
 };
 
 /** Supported tensor data layout dimensions */
 enum class DataLayoutDimension
 {
-    CHANNEL,
-    HEIGHT,
-    WIDTH,
-    BATCHES
+    CHANNEL, /**< channel */
+    HEIGHT,  /**< height */
+    WIDTH,   /**< width */
+    BATCHES  /**< batches */
 };
 
 /** Quantization settings (used for QASYMM8 data type) */
 struct QuantizationInfo
 {
+    /** Default constructor */
     QuantizationInfo()
         : scale(0.0f), offset(0)
     {
     }
 
+    /** Construct quantization info.
+     *
+     * @param[in] scale  Scale.
+     * @param[in] offset Offset.
+     */
     QuantizationInfo(float scale, int offset)
         : scale(scale), offset(offset)
     {
     }
 
+    /** Check whether equal to a given quantization info.
+     *
+     * @param[in] other Other quantization info.
+     *
+     * @return True if the given quantization info is the same.
+     */
     bool operator==(const QuantizationInfo &other)
     {
         return scale == other.scale && offset == other.offset;
     }
 
+    /** Check whether not equal to a given quantization info.
+     *
+     * @param[in] other Other quantization info.
+     *
+     * @return True if the given quantization info is not the same.
+     */
     bool operator!=(const QuantizationInfo &other)
     {
         return !(*this == other);
@@ -149,46 +167,80 @@ struct QuantizationInfo
     float scale;  /**< scale */
     int   offset; /**< offset */
 
-    /** Quantizes a value using the scale/offset in this QuantizationInfo */
+    /** Quantizes a value using the scale/offset in this QuantizationInfo
+     *
+     * @param[in] value           Value to quantize.
+     * @param[in] rounding_policy Policy to use when rounding.
+     *
+     * @return the quantized value.
+     */
     qasymm8_t quantize(float value, RoundingPolicy rounding_policy) const
     {
         ARM_COMPUTE_ERROR_ON_MSG(scale == 0, "QuantizationInfo::quantize: scale == 0");
         return sqcvt_qasymm8_f32(value, scale, offset, rounding_policy);
     }
 
-    /** Dequantizes a value using the scale/offset in this QuantizationInfo */
+    /** Dequantizes a value using the scale/offset in this QuantizationInfo
+     *
+     * @param[in] value Value to dequantize.
+     *
+     * @return the original value before quantization.
+     */
     float dequantize(qasymm8_t value) const
     {
         ARM_COMPUTE_ERROR_ON_MSG(scale == 0, "QuantizationInfo::dequantize: scale == 0");
         return scvt_f32_qasymm8(value, scale, offset);
     }
 
-    /** Indicates whether this QuantizationInfo has valid settings or not */
+    /** Indicates whether this QuantizationInfo has valid settings or not
+     *
+     * @return True if the this has invalid settings.
+     */
     bool empty() const
     {
         return scale == 0;
     }
 };
 
+/** Container for valid region of a window */
 struct ValidRegion
 {
+    /** Default constructor */
     ValidRegion()
         : anchor{}, shape{}
     {
     }
 
+    /** Allow instances of this class to be copy constructed */
     ValidRegion(const ValidRegion &) = default;
-    ValidRegion(ValidRegion &&)      = default;
+    /** Allow instances of this class to be move constructed */
+    ValidRegion(ValidRegion &&) = default;
+    /** Allow instances of this class to be copied */
     ValidRegion &operator=(const ValidRegion &) = default;
+    /** Allow instances of this class to be moved */
     ValidRegion &operator=(ValidRegion &&) = default;
-    ~ValidRegion()                         = default;
+    /** Default destructor */
+    ~ValidRegion() = default;
 
+    /** Constructor for a valid region with default number of dimensions
+     *
+     * @param[in] an_anchor Anchor for the start of the valid region.
+     * @param[in] a_shape   Shape of the valid region.
+     *
+     */
     ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape)
         : anchor{ an_anchor }, shape{ a_shape }
     {
         anchor.set_num_dimensions(std::max(anchor.num_dimensions(), shape.num_dimensions()));
     }
 
+    /** Constructor for a valid region with specified number of dimensions
+     *
+     * @param[in] an_anchor      Anchor for the start of the valid region.
+     * @param[in] a_shape        Shape of the valid region.
+     * @param[in] num_dimensions Number of dimensions (must be >= number of dimensions of anchor and shape).
+     *
+     */
     ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape, size_t num_dimensions)
         : anchor{ an_anchor }, shape{ a_shape }
     {
@@ -223,8 +275,8 @@ struct ValidRegion
         return *this;
     }
 
-    Coordinates anchor;
-    TensorShape shape;
+    Coordinates anchor; /**< Anchor for the start of the valid region. */
+    TensorShape shape;  /**< Shape of the valid region. */
 };
 
 /** Methods available to handle borders */
@@ -274,6 +326,12 @@ struct BorderSize
         return top == right && top == bottom && top == left;
     }
 
+    /** Scale this border size.
+     *
+     * @param[in] scale Scale to multiply border size by.
+     *
+     * @return *this.
+     */
     BorderSize &operator*=(float scale)
     {
         top *= scale;
@@ -284,6 +342,12 @@ struct BorderSize
         return *this;
     }
 
+    /** Scale a copy of this border size.
+     *
+     * @param[in] scale Scale to multiply border size by.
+     *
+     * @return a scaled copy of this.
+     */
     BorderSize operator*(float scale)
     {
         BorderSize size = *this;
@@ -292,6 +356,10 @@ struct BorderSize
         return size;
     }
 
+    /** Limit this border size.
+     *
+     * @param[in] limit Border size to limit this border size to.
+     */
     void limit(const BorderSize &limit)
     {
         top    = std::min(top, limit.top);
@@ -300,12 +368,13 @@ struct BorderSize
         left   = std::min(left, limit.left);
     }
 
-    unsigned int top;
-    unsigned int right;
-    unsigned int bottom;
-    unsigned int left;
+    unsigned int top;    /**< top of the border */
+    unsigned int right;  /**< right of the border */
+    unsigned int bottom; /**< bottom of the border */
+    unsigned int left;   /**< left of the border */
 };
 
+/** Container for 2D padding size */
 using PaddingSize = BorderSize;
 
 /** Policy to handle overflow */
@@ -326,8 +395,8 @@ enum class InterpolationPolicy
 /** Bilinear Interpolation method used by LKTracker */
 enum class BilinearInterpolation
 {
-    BILINEAR_OLD_NEW,
-    BILINEAR_SCHARR
+    BILINEAR_OLD_NEW, /**< Old-new method */
+    BILINEAR_SCHARR   /**< Scharr method */
 };
 
 /** Threshold mode */
@@ -340,9 +409,9 @@ enum class ThresholdType
 /** Termination criteria */
 enum class Termination
 {
-    TERM_CRITERIA_EPSILON,
-    TERM_CRITERIA_ITERATIONS,
-    TERM_CRITERIA_BOTH
+    TERM_CRITERIA_EPSILON,    /**< Terminate when within epsilon of a threshold */
+    TERM_CRITERIA_ITERATIONS, /**< Terminate after a maximum number of iterations */
+    TERM_CRITERIA_BOTH        /**< Terminate on whichever of the other conditions occurs first */
 };
 
 /** Magnitude calculation type. */
@@ -374,6 +443,7 @@ struct KeyPoint
     float   error{ 0.f };         /**< Tracking error initialized to 0 by the corner detector */
 };
 
+/** Internal key point */
 using InternalKeypoint = std::tuple<float, float, float>; /* x,y,strength */
 
 /** Rectangle type */
@@ -542,14 +612,28 @@ public:
           _round_type(round)
     {
     }
+    /** Get the stride.
+     *
+     * @return a pair: stride x, stride y.
+     */
     std::pair<unsigned int, unsigned int> stride() const
     {
         return _stride;
     }
+    /** Check whether the padding is symmetric.
+     *
+     * @return True if the padding is symmetric.
+     */
     bool padding_is_symmetric() const
     {
         return (_pad_left == _pad_right) && (_pad_top == _pad_bottom);
     }
+    /** Get the padding.
+     *
+     * @note This should only be used when the padding is symmetric.
+     *
+     * @return a pair: padding left/right, padding top/bottom
+     */
     std::pair<unsigned int, unsigned int> pad() const
     {
         //this accessor should be used only when padding is symmetric
@@ -557,28 +641,34 @@ public:
         return std::make_pair(_pad_left, _pad_top);
     }
 
+    /** Get the left padding */
     unsigned int pad_left() const
     {
         return _pad_left;
     }
+    /** Get the right padding */
     unsigned int pad_right() const
     {
         return _pad_right;
     }
+    /** Get the top padding */
     unsigned int pad_top() const
     {
         return _pad_top;
     }
+    /** Get the bottom padding */
     unsigned int pad_bottom() const
     {
         return _pad_bottom;
     }
 
+    /** Get the rounding type */
     DimensionRoundingType round() const
     {
         return _round_type;
     }
 
+    /** Check whether this has any padding */
     bool has_padding() const
     {
         return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0);
@@ -645,22 +735,27 @@ public:
         : _pool_type(pool_type), _pool_size(Size2D()), _pad_stride_info(PadStrideInfo(1, 1, 0, 0)), _exclude_padding(false), _is_global_pooling(true)
     {
     }
+    /** Get the pooling type */
     PoolingType pool_type() const
     {
         return _pool_type;
     }
+    /** Get the pooling size */
     const Size2D &pool_size() const
     {
         return _pool_size;
     }
+    /** Get the padding and stride */
     PadStrideInfo pad_stride_info() const
     {
         return _pad_stride_info;
     }
+    /** Check if padding is excluded in calculations */
     bool exclude_padding() const
     {
         return _exclude_padding;
     }
+    /** Check if is global pooling */
     bool is_global_pooling() const
     {
         return _is_global_pooling;
@@ -688,14 +783,17 @@ public:
         : _pooled_width(pooled_width), _pooled_height(pooled_height), _spatial_scale(spatial_scale)
     {
     }
+    /** Get the pooled width of the layer */
     unsigned int pooled_width() const
     {
         return _pooled_width;
     }
+    /** Get the pooled height of the layer */
     unsigned int pooled_height() const
     {
         return _pooled_height;
     }
+    /** Get the spatial scale */
     float spatial_scale() const
     {
         return _spatial_scale;
@@ -739,18 +837,22 @@ public:
         : _act(f), _a(a), _b(b), _enabled(true)
     {
     }
+    /** Get the type of activation function */
     ActivationFunction activation() const
     {
         return _act;
     }
+    /** Get the alpha value */
     float a() const
     {
         return _a;
     }
+    /** Get the beta value */
     float b() const
     {
         return _b;
     }
+    /** Check if initialised */
     bool enabled() const
     {
         return _enabled;
@@ -781,30 +883,37 @@ public:
         : _type(type), _norm_size(norm_size), _alpha(alpha), _beta(beta), _kappa(kappa), _is_scaled(is_scaled)
     {
     }
+    /** Get the normalization type */
     NormType type() const
     {
         return _type;
     }
+    /** Get the normalization size */
     uint32_t norm_size() const
     {
         return _norm_size;
     }
+    /** Get the alpha value */
     float alpha() const
     {
         return _alpha;
     }
+    /** Get the beta value */
     float beta() const
     {
         return _beta;
     }
+    /** Get the kappa value */
     float kappa() const
     {
         return _kappa;
     }
+    /** Check if normalization is cross map */
     bool is_cross_map() const
     {
         return _type == NormType::CROSS_MAP;
     }
+    /** Check if normalization is not cross map */
     bool is_in_map() const
     {
         return !is_cross_map();
@@ -1046,6 +1155,15 @@ struct IOFormatInfo
         Full         /**< Print the tensor object including padding */
     };
 
+    /** Construct a set of IO formatting information.
+     *
+     * @param[in] print_region   Area to be printed. Used by Tensor objects. Default: ValidRegion.
+     * @param[in] precision_type Precision type for floating point numbers. Default: stream default.
+     * @param[in] precision      Precision value for float point numbers. Default: 10.
+     * @param[in] align_columns  Whether to align columns when printed. Default: true.
+     * @param[in] element_delim  Delimeter between elements. Default: " ".
+     * @param[in] row_delim      Delimenter between rows. Default: "\n".
+     */
     IOFormatInfo(PrintRegion   print_region   = PrintRegion::ValidRegion,
                  PrecisionType precision_type = PrecisionType::Default,
                  unsigned int  precision      = 10,
@@ -1061,12 +1179,18 @@ struct IOFormatInfo
     {
     }
 
-    PrintRegion   print_region;
+    /** Area to be printed by Tensor objects */
+    PrintRegion print_region;
+    /** Floating point precision type */
     PrecisionType precision_type;
-    unsigned int  precision;
-    std::string   element_delim;
-    std::string   row_delim;
-    bool          align_columns;
+    /** Floating point precision */
+    unsigned int precision;
+    /** Element delimeter */
+    std::string element_delim;
+    /** Row delimeter */
+    std::string row_delim;
+    /** Align columns */
+    bool align_columns;
 };
 
 /** Available ConvolutionMethod*/
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index b3ebf5e25b..f1e357293f 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -40,14 +40,26 @@
 
 namespace arm_compute
 {
-/** Calculate the rounded up quotient of val / m. */
+/** Calculate the rounded up quotient of val / m.
+ *
+ * @param[in] val Value to divide and round up.
+ * @param[in] m   Value to divide by.
+ *
+ * @return the result.
+ */
 template <typename S, typename T>
 constexpr auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m)
 {
     return (val + m - 1) / m;
 }
 
-/** Computes the smallest number larger or equal to value that is a multiple of divisor. */
+/** Computes the smallest number larger or equal to value that is a multiple of divisor.
+ *
+ * @param[in] value   Lower bound value
+ * @param[in] divisor Value to compute multiple of.
+ *
+ * @return the result.
+ */
 template <typename S, typename T>
 inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor)
 {
@@ -55,7 +67,13 @@ inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor -
     return DIV_CEIL(value, divisor) * divisor;
 }
 
-/** Computes the largest number smaller or equal to value that is a multiple of divisor. */
+/** Computes the largest number smaller or equal to value that is a multiple of divisor.
+ *
+ * @param[in] value   Upper bound value
+ * @param[in] divisor Value to compute multiple of.
+ *
+ * @return the result.
+ */
 template <typename S, typename T>
 inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) * divisor)
 {
@@ -665,6 +683,7 @@ TensorShape calculate_depth_concatenate_shape(const std::vector<T *> &inputs_vec
  * @param[in, out] shape  Tensor shape of 2D size
  * @param[in]      format Format of the tensor
  *
+ * @return The adjusted tensor shape.
  */
 inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format)
 {
diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h
index 08e872fd90..8a0257407c 100644
--- a/arm_compute/core/Validate.h
+++ b/arm_compute/core/Validate.h
@@ -61,7 +61,7 @@ inline bool have_different_dimensions(const Dimensions<T> &dim1, const Dimension
     return false;
 }
 
-/** Functor to compare two @ref Dimensions objects and throw an error on mismatch.
+/** Function to compare two @ref Dimensions objects and throw an error on mismatch.
  *
  * @param[in] dim      Object to compare against.
  * @param[in] function Function in which the error occurred.
@@ -72,6 +72,13 @@ template <typename T>
 class compare_dimension
 {
 public:
+    /** Construct a comparison function.
+     *
+     * @param[in] dim      Dimensions to compare.
+     * @param[in] function Source function. Used for error reporting.
+     * @param[in] file     Source code file. Used for error reporting.
+     * @param[in] line     Source code line. Used for error reporting.
+     */
     compare_dimension(const Dimensions<T> &dim, const char *function, const char *file, int line)
         : _dim{ dim }, _function{ function }, _file{ file }, _line{ line }
     {
@@ -80,6 +87,8 @@ public:
     /** Compare the given object against the stored one.
      *
      * @param[in] dim To be compared object.
+     *
+     * @return a status.
      */
     arm_compute::Status operator()(const Dimensions<T> &dim)
     {
@@ -109,11 +118,19 @@ inline arm_compute::Status for_each_error(F &&func, T &&arg, Ts &&... args)
     return arm_compute::Status{};
 }
 
+/** Get the info for a tensor, dummy struct */
 template <typename T>
 struct get_tensor_info_t;
+/** Get the info for a tensor */
 template <>
 struct get_tensor_info_t<ITensorInfo *>
 {
+    /** Get the info for a tensor.
+     *
+     * @param[in] tensor Tensor.
+     *
+     * @return tensor info.
+     */
     ITensorInfo *operator()(const ITensor *tensor)
     {
         return tensor->info();
@@ -845,6 +862,8 @@ arm_compute::Status error_on_invalid_multi_hog(const char *function, const char
  * @param[in] file     Name of the file where the error occurred.
  * @param[in] line     Line on which the error occurred.
  * @param[in] kernel   Kernel to validate.
+ *
+ * @return Status
  */
 arm_compute::Status error_on_unconfigured_kernel(const char *function, const char *file, const int line,
                                                  const IKernel *kernel);
diff --git a/arm_compute/core/Window.h b/arm_compute/core/Window.h
index 5ca210a112..6f172ecebf 100644
--- a/arm_compute/core/Window.h
+++ b/arm_compute/core/Window.h
@@ -320,7 +320,7 @@ public:
         return slide_window_slice<4>(slice);
     }
 
-    /* Collapse the dimensions between @p first and @p last if possible.
+    /** Collapse the dimensions between @p first and @p last if possible.
      *
      * A dimension is collapsable if it starts from 0 and matches the corresponding dimension in the full_window
      *
@@ -333,7 +333,7 @@ public:
      */
     Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed = nullptr) const;
 
-    /* Collapse the dimensions higher than @p first if possible.
+    /** Collapse the dimensions higher than @p first if possible.
      *
      * A dimension is collapsable if it starts from 0 and matches the corresponding dimension in the full_window
      *
@@ -348,7 +348,7 @@ public:
         return collapse_if_possible(full_window, first, Coordinates::num_max_dimensions, has_collapsed);
     }
 
-    /* Collapse the dimensions between @p first and @p last.
+    /** Collapse the dimensions between @p first and @p last.
      *
      * A dimension is collapsable if it starts from 0 and matches the corresponding dimension in the full_window
      *
@@ -360,7 +360,7 @@ public:
      */
     Window collapse(const Window &full_window, size_t first, size_t last = Coordinates::num_max_dimensions) const;
 
-    /* Don't advance in the dimension where @p shape is less equal to 1.
+    /** Don't advance in the dimension where @p shape is less equal to 1.
      *
      * @param[in] shape A TensorShape.
      *
@@ -368,7 +368,7 @@ public:
      */
     Window broadcast_if_dimension_le_one(const TensorShape &shape) const;
 
-    /* Don't advance in the dimension where shape of @p info is less equal to 1.
+    /** Don't advance in the dimension where shape of @p info is less equal to 1.
      *
      * @param[in] info An ITensorInfo.
      *
diff --git a/arm_compute/core/utils/logging/LoggerRegistry.h b/arm_compute/core/utils/logging/LoggerRegistry.h
index d3c691139e..066a42ff4f 100644
--- a/arm_compute/core/utils/logging/LoggerRegistry.h
+++ b/arm_compute/core/utils/logging/LoggerRegistry.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -51,7 +51,7 @@ public:
      * @note Some names are reserved e.g. [CORE, RUNTIME, GRAPH]
      *
      * @param[in] name      Logger's name
-     * @param[in] log_level Logger's log level. Defaults to @ref LogLevel::INFO
+     * @param[in] log_level Logger's log level. Defaults to INFO
      * @param[in] printers  Printers to attach to the system loggers. Defaults with a @ref StdPrinter.
      */
     void create_logger(const std::string &name, LogLevel log_level = LogLevel::INFO,
@@ -70,7 +70,7 @@ public:
     std::shared_ptr<Logger> logger(const std::string &name);
     /** Creates reserved library loggers
      *
-     * @param[in] log_level (Optional) Logger's log level. Defaults to @ref LogLevel::INFO
+     * @param[in] log_level (Optional) Logger's log level. Defaults to INFO
      * @param[in] printers  (Optional) Printers to attach to the system loggers. Defaults with a @ref StdPrinter.
      */
     void create_reserved_loggers(LogLevel                              log_level = LogLevel::INFO,
diff --git a/arm_compute/core/utils/logging/Types.h b/arm_compute/core/utils/logging/Types.h
index 0b40e3d7a1..d56784369c 100644
--- a/arm_compute/core/utils/logging/Types.h
+++ b/arm_compute/core/utils/logging/Types.h
@@ -31,7 +31,7 @@ namespace arm_compute
 namespace logging
 {
 /** Logging level enumeration */
-enum class LogLevel : unsigned int
+enum class LogLevel
 {
     VERBOSE, /**< All logging messages */
     INFO,    /**< Information log level */
@@ -40,19 +40,28 @@ enum class LogLevel : unsigned int
     OFF      /**< No logging */
 };
 
+/** Log message */
 struct LogMsg
 {
+    /** Default constructor */
     LogMsg()
         : raw_(), log_level_(LogLevel::OFF)
     {
     }
+    /** Construct a log message
+     *
+     * @param[in] msg       Message to log.
+     * @param[in] log_level Logging level. Default: OFF
+     */
     LogMsg(std::string msg, LogLevel log_level = LogLevel::OFF)
         : raw_(msg), log_level_(log_level)
     {
     }
 
+    /** Log message */
     std::string raw_;
-    LogLevel    log_level_;
+    /** Logging level */
+    LogLevel log_level_;
 };
 } // namespace logging
 } // namespace arm_compute
diff --git a/arm_compute/core/utils/misc/CRTP.h b/arm_compute/core/utils/misc/CRTP.h
index 9947312f95..8c39af14b1 100644
--- a/arm_compute/core/utils/misc/CRTP.h
+++ b/arm_compute/core/utils/misc/CRTP.h
@@ -33,6 +33,7 @@ template <typename T, template <typename> class Type>
 struct CRTP
 {
 public:
+    /** Exact type */
     using ExactType = T;
 
 protected:
diff --git a/arm_compute/core/utils/misc/Iterable.h b/arm_compute/core/utils/misc/Iterable.h
index 96a650af35..d150111a56 100644
--- a/arm_compute/core/utils/misc/Iterable.h
+++ b/arm_compute/core/utils/misc/Iterable.h
@@ -49,21 +49,37 @@ public:
     {
     }
 
+    /** Get beginning of iterator.
+     *
+     * @return beginning of iterator.
+     */
     typename T::reverse_iterator begin()
     {
         return _it.rbegin();
     }
 
+    /** Get end of iterator.
+     *
+     * @return end of iterator.
+     */
     typename T::reverse_iterator end()
     {
         return _it.rend();
     }
 
+    /** Get beginning of const iterator.
+     *
+     * @return beginning of const iterator.
+     */
     typename T::const_reverse_iterator cbegin()
     {
         return _it.rbegin();
     }
 
+    /** Get end of const iterator.
+     *
+     * @return end of const iterator.
+     */
     typename T::const_reverse_iterator cend()
     {
         return _it.rend();
diff --git a/arm_compute/core/utils/strong_type/StrongTypeAttributes.h b/arm_compute/core/utils/strong_type/StrongTypeAttributes.h
index b5ed48f5ce..78a4032da7 100644
--- a/arm_compute/core/utils/strong_type/StrongTypeAttributes.h
+++ b/arm_compute/core/utils/strong_type/StrongTypeAttributes.h
@@ -34,6 +34,7 @@ namespace strong_type
 template <typename T>
 struct Comparable : misc::CRTP<T, Comparable>
 {
+#ifndef DOXYGEN_SKIP_THIS
     bool operator==(T const &other) const
     {
         return this->impl().get() == other.get();
@@ -58,6 +59,7 @@ struct Comparable : misc::CRTP<T, Comparable>
     {
         return !(*this > other);
     }
+#endif /* DOXYGEN_SKIP_THIS */
 };
 } // namespace strong_type
 } // namespace arm_compute
diff --git a/arm_compute/graph/INode.h b/arm_compute/graph/INode.h
index 1969423074..be4575de91 100644
--- a/arm_compute/graph/INode.h
+++ b/arm_compute/graph/INode.h
@@ -46,6 +46,8 @@ public:
      * @param[in] ctx    Graph context to be used
      * @param[in] input  Input tensor of the node
      * @param[in] output Output tensor of the node
+     *
+     * @return a pointer to the function which implements the node.
      */
     virtual std::unique_ptr<arm_compute::IFunction> instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output) = 0;
     /** Override the existing target hint
diff --git a/arm_compute/graph/IOperation.h b/arm_compute/graph/IOperation.h
index a9fa4f83c7..b1afb21db7 100644
--- a/arm_compute/graph/IOperation.h
+++ b/arm_compute/graph/IOperation.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,6 +43,8 @@ public:
     /** Interface to be implemented that configures an operation
      *
      * @param[in] ctx Node parameters to be used by the operation
+     *
+     * @return a pointer to the function which implements the operation.
      */
     virtual std::unique_ptr<arm_compute::IFunction> configure(NodeContext &ctx) = 0;
     /** Interface to be implemented that returns the target of the operation
diff --git a/arm_compute/graph/ITensorObject.h b/arm_compute/graph/ITensorObject.h
index a922dd53fe..1b6f929305 100644
--- a/arm_compute/graph/ITensorObject.h
+++ b/arm_compute/graph/ITensorObject.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,7 +61,11 @@ public:
      *
      * @return Tensor
      */
-    virtual ITensor       *tensor()       = 0;
+    virtual ITensor *tensor() = 0;
+    /** Returns a pointer to the internal tensor
+     *
+     * @return const Tensor
+     */
     virtual const ITensor *tensor() const = 0;
     /** Return the target that this tensor is pinned on
      *
diff --git a/arm_compute/graph2/GraphContext.h b/arm_compute/graph2/GraphContext.h
index bd579eaca2..72ed96e7a0 100644
--- a/arm_compute/graph2/GraphContext.h
+++ b/arm_compute/graph2/GraphContext.h
@@ -38,8 +38,8 @@ namespace graph2
 /** Contains structs required for memory management */
 struct MemoryManagerContext
 {
-    Target                                       target = { Target::UNSPECIFIED };
-    std::shared_ptr<arm_compute::IMemoryManager> mm     = { nullptr };
+    Target                                       target = { Target::UNSPECIFIED }; /**< Target */
+    std::shared_ptr<arm_compute::IMemoryManager> mm     = { nullptr };             /**< Memory manager */
 };
 
 /** Graph context **/
diff --git a/arm_compute/graph2/INodeVisitor.h b/arm_compute/graph2/INodeVisitor.h
index 429a2584bb..a7b8aeb45d 100644
--- a/arm_compute/graph2/INodeVisitor.h
+++ b/arm_compute/graph2/INodeVisitor.h
@@ -34,23 +34,88 @@ namespace graph2
 class INodeVisitor
 {
 public:
-    virtual ~INodeVisitor()                              = default;
-    virtual void visit(INode &n)                         = 0;
-    virtual void visit(ActivationLayerNode &n)           = 0;
-    virtual void visit(BatchNormalizationLayerNode &n)   = 0;
-    virtual void visit(ConstNode &n)                     = 0;
-    virtual void visit(ConvolutionLayerNode &n)          = 0;
-    virtual void visit(DepthConcatenateLayerNode &n)     = 0;
+    /** Default destructor. */
+    virtual ~INodeVisitor() = default;
+    /** Visit INode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(INode &n) = 0;
+    /** Visit ActivationLayerNode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(ActivationLayerNode &n) = 0;
+    /** Visit BatchNormalizationLayerNode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(BatchNormalizationLayerNode &n) = 0;
+    /** Visit ConstNode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(ConstNode &n) = 0;
+    /** Visit ConvolutionLayerNode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(ConvolutionLayerNode &n) = 0;
+    /** Visit DepthConcatenateLayerNode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(DepthConcatenateLayerNode &n) = 0;
+    /** Visit DepthwiseConvolutionLayerNode.
+     *
+     * @param[in] n Node to visit.
+     */
     virtual void visit(DepthwiseConvolutionLayerNode &n) = 0;
-    virtual void visit(EltwiseLayerNode &n)              = 0;
-    virtual void visit(FlattenLayerNode &n)              = 0;
-    virtual void visit(FullyConnectedLayerNode &n)       = 0;
-    virtual void visit(InputNode &n)                     = 0;
-    virtual void visit(NormalizationLayerNode &n)        = 0;
-    virtual void visit(OutputNode &n)                    = 0;
-    virtual void visit(PoolingLayerNode &n)              = 0;
-    virtual void visit(ReshapeLayerNode &n)              = 0;
-    virtual void visit(SoftmaxLayerNode &n)              = 0;
+    /** Visit EltwiseLayerNode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(EltwiseLayerNode &n) = 0;
+    /** Visit FlattenLayerNode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(FlattenLayerNode &n) = 0;
+    /** Visit FullyConnectedLayerNode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(FullyConnectedLayerNode &n) = 0;
+    /** Visit InputNode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(InputNode &n) = 0;
+    /** Visit NormalizationLayerNode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(NormalizationLayerNode &n) = 0;
+    /** Visit OutputNode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(OutputNode &n) = 0;
+    /** Visit PoolingLayerNode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(PoolingLayerNode &n) = 0;
+    /** Visit ReshapeLayerNode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(ReshapeLayerNode &n) = 0;
+    /** Visit SoftmaxLayerNode.
+     *
+     * @param[in] n Node to visit.
+     */
+    virtual void visit(SoftmaxLayerNode &n) = 0;
 };
 
 /** Default visitor implementation
@@ -61,8 +126,10 @@ public:
 class DefaultNodeVisitor : public INodeVisitor
 {
 public:
+    /** Default destructor */
     virtual ~DefaultNodeVisitor() = default;
 
+#ifndef DOXYGEN_SKIP_THIS
     // Inherited methods overridden
     virtual void visit(INode &n) override
     {
@@ -128,6 +195,7 @@ public:
     {
         default_visit();
     }
+#endif /* DOXYGEN_SKIP_THIS */
 
     /** Function to be overloaded by the client and implement default behavior for the
      *  non-overloaded visitors
diff --git a/arm_compute/graph2/TensorDescriptor.h b/arm_compute/graph2/TensorDescriptor.h
index ff23f71471..1a69dc10e8 100644
--- a/arm_compute/graph2/TensorDescriptor.h
+++ b/arm_compute/graph2/TensorDescriptor.h
@@ -46,9 +46,9 @@ struct TensorDescriptor final
     {
     }
 
-    TensorShape shape{};
-    DataType    data_type{ DataType::UNKNOWN };
-    Target      target{ Target::UNSPECIFIED };
+    TensorShape shape{};                        /**< Tensor shape */
+    DataType    data_type{ DataType::UNKNOWN }; /**< Data type */
+    Target      target{ Target::UNSPECIFIED };  /**< Target */
 };
 } // namespace graph2
 } // namespace arm_compute
diff --git a/arm_compute/graph2/Types.h b/arm_compute/graph2/Types.h
index 05c15f4daf..2e9fe38380 100644
--- a/arm_compute/graph2/Types.h
+++ b/arm_compute/graph2/Types.h
@@ -144,15 +144,15 @@ enum class MemoryManagerAffinity
  */
 struct NodeIdxPair
 {
-    NodeID node_id;
-    size_t index;
+    NodeID node_id; /**< Node ID */
+    size_t index;   /**< Index */
 };
 
 /** Common node parameters */
 struct NodeParams
 {
-    std::string name;
-    Target      target;
+    std::string name;   /**< Node name */
+    Target      target; /**< Node target */
 };
 } // namespace graph2
 } // namespace arm_compute
diff --git a/arm_compute/graph2/backends/NEON/NEFunctionFactory.h b/arm_compute/graph2/backends/NEON/NEFunctionFactory.h
index a065340ad6..09ca49ae6c 100644
--- a/arm_compute/graph2/backends/NEON/NEFunctionFactory.h
+++ b/arm_compute/graph2/backends/NEON/NEFunctionFactory.h
@@ -38,6 +38,7 @@ class GraphContext;
 
 namespace backends
 {
+/** Factory for generating NEON backend functions **/
 class NEFunctionFactory final
 {
 public:
diff --git a/arm_compute/graph2/frontend/ILayer.h b/arm_compute/graph2/frontend/ILayer.h
index fee0b37e64..f8f6a5d47e 100644
--- a/arm_compute/graph2/frontend/ILayer.h
+++ b/arm_compute/graph2/frontend/ILayer.h
@@ -33,11 +33,18 @@ namespace frontend
 // Forward declarations
 class IStream;
 
-/** ILayer interface **/
+/** ILayer interface */
 class ILayer
 {
 public:
-    virtual ~ILayer()                       = default;
+    /** Default destructor */
+    virtual ~ILayer() = default;
+    /** Create layer and add to the given stream.
+     *
+     * @param[in] s Stream to add layer to.
+     *
+     * @return ID of the created node.
+     */
     virtual NodeID create_layer(IStream &s) = 0;
 };
 } // namespace frontend
diff --git a/arm_compute/graph2/frontend/Layers.h b/arm_compute/graph2/frontend/Layers.h
index 40274a4769..7ea23e0684 100644
--- a/arm_compute/graph2/frontend/Layers.h
+++ b/arm_compute/graph2/frontend/Layers.h
@@ -45,6 +45,11 @@ namespace frontend
 class InputLayer final : public ILayer
 {
 public:
+    /** Construct an input layer.
+     *
+     * @param[in] desc     Description of input tensor.
+     * @param[in] accessor Accessor to get input tensor data from.
+     */
     InputLayer(TensorDescriptor desc, ITensorAccessorUPtr accessor)
         : _desc(desc), _accessor(std::move(accessor))
     {
@@ -65,6 +70,10 @@ private:
 class OutputLayer final : public ILayer
 {
 public:
+    /** Construct an output layer.
+     *
+     * @param[in] accessor Accessor to give output tensor data to.
+     */
     OutputLayer(ITensorAccessorUPtr accessor)
         : _accessor(std::move(accessor))
     {
@@ -85,6 +94,10 @@ private:
 class ActivationLayer final : public ILayer
 {
 public:
+    /** Construct an activation layer.
+     *
+     * @param[in] act_info Activation information
+     */
     ActivationLayer(ActivationLayerInfo act_info)
         : _act_info(act_info)
     {
@@ -105,6 +118,14 @@ private:
 class BatchNormalizationLayer final : public ILayer
 {
 public:
+    /** Construct a batch normalization layer.
+     *
+     * @param[in] mean    Accessor to get mean tensor data from.
+     * @param[in] var     Accessor to get var tensor data from.
+     * @param[in] gamma   (Optional) Accessor to get gamma tensor data from. Default: nullptr.
+     * @param[in] beta    (Optional) Accessor to get beta tensor data from. Default: nullptr.
+     * @param[in] epsilon (Optional) Epsilon value. Default: 0.001.
+     */
     BatchNormalizationLayer(ITensorAccessorUPtr mean,
                             ITensorAccessorUPtr var,
                             ITensorAccessorUPtr gamma   = nullptr,
@@ -137,6 +158,16 @@ private:
 class ConvolutionLayer final : public ILayer
 {
 public:
+    /** Construct a convolution layer.
+     *
+     * @param[in] conv_width  Convolution width.
+     * @param[in] conv_height Convolution height.
+     * @param[in] ofm         Output feature map.
+     * @param[in] weights     Accessor to get kernel weights from.
+     * @param[in] bias        Accessor to get kernel bias from.
+     * @param[in] conv_info   Padding and stride information.
+     * @param[in] num_groups  (Optional) Number of groups. Default: 1.
+     */
     ConvolutionLayer(unsigned int        conv_width,
                      unsigned int        conv_height,
                      unsigned int        ofm,
@@ -179,6 +210,14 @@ private:
 class DepthwiseConvolutionLayer final : public ILayer
 {
 public:
+    /** Construct a depthwise convolution layer.
+     *
+     * @param[in] conv_width  Convolution width.
+     * @param[in] conv_height Convolution height.
+     * @param[in] weights     Accessor to get kernel weights from.
+     * @param[in] bias        Accessor to get kernel bias from.
+     * @param[in] conv_info   Padding and stride information.
+     */
     DepthwiseConvolutionLayer(unsigned int        conv_width,
                               unsigned int        conv_height,
                               ITensorAccessorUPtr weights,
@@ -214,6 +253,7 @@ private:
 class FlattenLayer final : public ILayer
 {
 public:
+    /** Construct a flatten layer. */
     FlattenLayer()
     {
     }
@@ -230,6 +270,12 @@ public:
 class FullyConnectedLayer final : public ILayer
 {
 public:
+    /** Construct a fully connected layer.
+     *
+     * @param[in] num_outputs Number of outputs.
+     * @param[in] weights     Accessor to get weights from.
+     * @param[in] bias        Accessor to get bias from.
+     */
     FullyConnectedLayer(unsigned int        num_outputs,
                         ITensorAccessorUPtr weights,
                         ITensorAccessorUPtr bias)
@@ -255,6 +301,10 @@ private:
 class NormalizationLayer final : public ILayer
 {
 public:
+    /** Construct a normalization layer.
+     *
+     * @param[in] norm_info Normalization information.
+     */
     NormalizationLayer(NormalizationLayerInfo norm_info)
         : _norm_info(norm_info)
     {
@@ -275,6 +325,10 @@ private:
 class PoolingLayer final : public ILayer
 {
 public:
+    /** Construct a pooling layer.
+     *
+     * @param[in] pool_info Pooling information.
+     */
     PoolingLayer(PoolingLayerInfo pool_info)
         : _pool_info(pool_info)
     {
@@ -295,6 +349,10 @@ private:
 class ReshapeLayer final : public ILayer
 {
 public:
+    /** Construct a reshape layer.
+     *
+     * @param[in] shape Target shape.
+     */
     ReshapeLayer(TensorShape shape)
         : _shape(shape)
     {
@@ -315,6 +373,10 @@ private:
 class SoftmaxLayer final : public ILayer
 {
 public:
+    /** Construct a softmax layer.
+     *
+     * @param[in] beta (Optional) Beta value. Default 1.0.
+     */
     SoftmaxLayer(float beta = 1.0f)
         : _beta(beta)
     {
@@ -335,7 +397,7 @@ private:
 class BranchLayer final : public ILayer
 {
 public:
-    /** Default Constructor
+    /** Construct a branch layer
      *
      * @param[in] merge_method     Branch merging method
      * @param[in] sub_stream1      First graph branch
@@ -355,7 +417,7 @@ public:
         },
         std::move(rest_sub_streams)...);
     }
-    /** Default Constructor
+    /** Construct a branch layer
      *
      * @param[in] sub_stream Sub-stream
      */
diff --git a/arm_compute/graph2/nodes/ActivationLayerNode.h b/arm_compute/graph2/nodes/ActivationLayerNode.h
index c3775231a4..cb19c818c5 100644
--- a/arm_compute/graph2/nodes/ActivationLayerNode.h
+++ b/arm_compute/graph2/nodes/ActivationLayerNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Activation Layer node */
 class ActivationLayerNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/nodes/BatchNormalizationLayerNode.h b/arm_compute/graph2/nodes/BatchNormalizationLayerNode.h
index a521938414..a6e8e2b98e 100644
--- a/arm_compute/graph2/nodes/BatchNormalizationLayerNode.h
+++ b/arm_compute/graph2/nodes/BatchNormalizationLayerNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Batch Normalization Layer node */
 class BatchNormalizationLayerNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/nodes/ConstNode.h b/arm_compute/graph2/nodes/ConstNode.h
index 73a2246498..e1c66176f0 100644
--- a/arm_compute/graph2/nodes/ConstNode.h
+++ b/arm_compute/graph2/nodes/ConstNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Const node */
 class ConstNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/nodes/ConvolutionLayerNode.h b/arm_compute/graph2/nodes/ConvolutionLayerNode.h
index 1af344ea13..6e3c9bef32 100644
--- a/arm_compute/graph2/nodes/ConvolutionLayerNode.h
+++ b/arm_compute/graph2/nodes/ConvolutionLayerNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Convolution Layer node */
 class ConvolutionLayerNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/nodes/DepthConcatenateLayerNode.h b/arm_compute/graph2/nodes/DepthConcatenateLayerNode.h
index 617b9842fb..23c31048e9 100644
--- a/arm_compute/graph2/nodes/DepthConcatenateLayerNode.h
+++ b/arm_compute/graph2/nodes/DepthConcatenateLayerNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Depth Concatenation Layer node */
 class DepthConcatenateLayerNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/nodes/DepthwiseConvolutionLayerNode.h b/arm_compute/graph2/nodes/DepthwiseConvolutionLayerNode.h
index 1b05edf4dc..d5b8e34554 100644
--- a/arm_compute/graph2/nodes/DepthwiseConvolutionLayerNode.h
+++ b/arm_compute/graph2/nodes/DepthwiseConvolutionLayerNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Depthwise Convolution Layer node */
 class DepthwiseConvolutionLayerNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/nodes/EltwiseLayerNode.h b/arm_compute/graph2/nodes/EltwiseLayerNode.h
index 2b217decff..48df2b715d 100644
--- a/arm_compute/graph2/nodes/EltwiseLayerNode.h
+++ b/arm_compute/graph2/nodes/EltwiseLayerNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Eltwise Layer node */
 class EltwiseLayerNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/nodes/FlattenLayerNode.h b/arm_compute/graph2/nodes/FlattenLayerNode.h
index de601f5f4e..41f6f85045 100644
--- a/arm_compute/graph2/nodes/FlattenLayerNode.h
+++ b/arm_compute/graph2/nodes/FlattenLayerNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Flatten Layer node */
 class FlattenLayerNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/nodes/FullyConnectedLayerNode.h b/arm_compute/graph2/nodes/FullyConnectedLayerNode.h
index 836f20fdb3..5c71f4ca69 100644
--- a/arm_compute/graph2/nodes/FullyConnectedLayerNode.h
+++ b/arm_compute/graph2/nodes/FullyConnectedLayerNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Fully Connected Layer node */
 class FullyConnectedLayerNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/nodes/InputNode.h b/arm_compute/graph2/nodes/InputNode.h
index 2cad6f8fc6..667dcfacf0 100644
--- a/arm_compute/graph2/nodes/InputNode.h
+++ b/arm_compute/graph2/nodes/InputNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Input Layer node */
 class InputNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/nodes/NormalizationLayerNode.h b/arm_compute/graph2/nodes/NormalizationLayerNode.h
index e2816e9352..78a843a1e7 100644
--- a/arm_compute/graph2/nodes/NormalizationLayerNode.h
+++ b/arm_compute/graph2/nodes/NormalizationLayerNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Normalization Layer node */
 class NormalizationLayerNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/nodes/OutputNode.h b/arm_compute/graph2/nodes/OutputNode.h
index 94df382d22..0c28c84214 100644
--- a/arm_compute/graph2/nodes/OutputNode.h
+++ b/arm_compute/graph2/nodes/OutputNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Output Layer node */
 class OutputNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/nodes/PoolingLayerNode.h b/arm_compute/graph2/nodes/PoolingLayerNode.h
index b0c6270999..09332a9367 100644
--- a/arm_compute/graph2/nodes/PoolingLayerNode.h
+++ b/arm_compute/graph2/nodes/PoolingLayerNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Pooling Layer node */
 class PoolingLayerNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/nodes/ReshapeLayerNode.h b/arm_compute/graph2/nodes/ReshapeLayerNode.h
index 89ee46c8e1..27d52601da 100644
--- a/arm_compute/graph2/nodes/ReshapeLayerNode.h
+++ b/arm_compute/graph2/nodes/ReshapeLayerNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Reshape Layer node */
 class ReshapeLayerNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/nodes/SoftmaxLayerNode.h b/arm_compute/graph2/nodes/SoftmaxLayerNode.h
index 86decb80d9..b1091e28fc 100644
--- a/arm_compute/graph2/nodes/SoftmaxLayerNode.h
+++ b/arm_compute/graph2/nodes/SoftmaxLayerNode.h
@@ -30,6 +30,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Softmax Layer node */
 class SoftmaxLayerNode final : public INode
 {
 public:
diff --git a/arm_compute/graph2/printers/DotGraphPrinter.h b/arm_compute/graph2/printers/DotGraphPrinter.h
index 3b1879c505..0efdf772e3 100644
--- a/arm_compute/graph2/printers/DotGraphPrinter.h
+++ b/arm_compute/graph2/printers/DotGraphPrinter.h
@@ -34,6 +34,7 @@ namespace arm_compute
 {
 namespace graph2
 {
+/** Graph printer visitor. */
 class DotGraphVisitor final : public DefaultNodeVisitor
 {
 public:
diff --git a/arm_compute/runtime/Array.h b/arm_compute/runtime/Array.h
index 61e0953edd..4fc79026e0 100644
--- a/arm_compute/runtime/Array.h
+++ b/arm_compute/runtime/Array.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,15 +61,27 @@ private:
     std::unique_ptr<T[]> _values;
 };
 
-using KeyPointArray        = Array<KeyPoint>;
-using Coordinates2DArray   = Array<Coordinates2D>;
+/** Array of Key Points. */
+using KeyPointArray = Array<KeyPoint>;
+/** Array of 2D Coordinates. */
+using Coordinates2DArray = Array<Coordinates2D>;
+/** Array of Detection Windows. */
 using DetectionWindowArray = Array<DetectionWindow>;
-using Size2DArray          = Array<Size2D>;
-using UInt8Array           = Array<uint8_t>;
-using UInt16Array          = Array<uint16_t>;
-using UInt32Array          = Array<uint32_t>;
-using Int16Array           = Array<int16_t>;
-using Int32Array           = Array<int32_t>;
-using FloatArray           = Array<float>;
+/** Array of ROIs. */
+using ROIArray = Array<ROI>;
+/** Array of 2D Sizes. */
+using Size2DArray = Array<Size2D>;
+/** Array of uint8s. */
+using UInt8Array = Array<uint8_t>;
+/** Array of uint16s. */
+using UInt16Array = Array<uint16_t>;
+/** Array of uint32s. */
+using UInt32Array = Array<uint32_t>;
+/** Array of int16s. */
+using Int16Array = Array<int16_t>;
+/** Array of int32s. */
+using Int32Array = Array<int32_t>;
+/** Array of floats. */
+using FloatArray = Array<float>;
 }
 #endif /* __ARM_COMPUTE_ARRAY_H__ */
diff --git a/arm_compute/runtime/CL/CLArray.h b/arm_compute/runtime/CL/CLArray.h
index dda26e2e89..01c6d8df3d 100644
--- a/arm_compute/runtime/CL/CLArray.h
+++ b/arm_compute/runtime/CL/CLArray.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,7 +46,9 @@ public:
     CLArray(const CLArray &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLArray &operator=(const CLArray &) = delete;
-    CLArray(CLArray &&)                 = default;
+    /** Allow instances of this class to be move constructed */
+    CLArray(CLArray &&) = default;
+    /** Allow instances of this class to be moved */
     CLArray &operator=(CLArray &&) = default;
     /** Constructor: initializes an array which can contain up to max_num_points values
      *
@@ -101,16 +103,27 @@ private:
     cl::Buffer _buffer;
 };
 
-using CLKeyPointArray        = CLArray<KeyPoint>;
-using CLCoordinates2DArray   = CLArray<Coordinates2D>;
+/** OpenCL Array of Key Points. */
+using CLKeyPointArray = CLArray<KeyPoint>;
+/** OpenCL Array of 2D Coordinates. */
+using CLCoordinates2DArray = CLArray<Coordinates2D>;
+/** OpenCL Array of Detection Windows. */
 using CLDetectionWindowArray = CLArray<DetectionWindow>;
-using CLROIArray             = CLArray<ROI>;
-using CLSize2DArray          = CLArray<Size2D>;
-using CLUInt8Array           = CLArray<cl_uchar>;
-using CLUInt16Array          = CLArray<cl_ushort>;
-using CLUInt32Array          = CLArray<cl_uint>;
-using CLInt16Array           = CLArray<cl_short>;
-using CLInt32Array           = CLArray<cl_int>;
-using CLFloatArray           = CLArray<cl_float>;
+/** OpenCL Array of ROIs. */
+using CLROIArray = CLArray<ROI>;
+/** OpenCL Array of 2D Sizes. */
+using CLSize2DArray = CLArray<Size2D>;
+/** OpenCL Array of uint8s. */
+using CLUInt8Array = CLArray<cl_uchar>;
+/** OpenCL Array of uint16s. */
+using CLUInt16Array = CLArray<cl_ushort>;
+/** OpenCL Array of uint32s. */
+using CLUInt32Array = CLArray<cl_uint>;
+/** OpenCL Array of int16s. */
+using CLInt16Array = CLArray<cl_short>;
+/** OpenCL Array of int32s. */
+using CLInt32Array = CLArray<cl_int>;
+/** OpenCL Array of floats. */
+using CLFloatArray = CLArray<cl_float>;
 }
 #endif /* __ARM_COMPUTE_CLARRAY_H__ */
diff --git a/arm_compute/runtime/CL/CLDistribution1D.h b/arm_compute/runtime/CL/CLDistribution1D.h
index 55dd1247ed..f077893bc0 100644
--- a/arm_compute/runtime/CL/CLDistribution1D.h
+++ b/arm_compute/runtime/CL/CLDistribution1D.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,9 +44,9 @@ public:
      * @param[in] range    The total number of the consecutive values of the distribution interval.
      */
     CLDistribution1D(size_t num_bins, int32_t offset, uint32_t range);
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLDistribution1D(const CLDistribution1D &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLDistribution1D &operator=(const CLDistribution1D &) = delete;
     /** Enqueue a map operation of the allocated buffer.
      *
diff --git a/arm_compute/runtime/CL/CLLutAllocator.h b/arm_compute/runtime/CL/CLLutAllocator.h
index 4648ffb51f..851e625f84 100644
--- a/arm_compute/runtime/CL/CLLutAllocator.h
+++ b/arm_compute/runtime/CL/CLLutAllocator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,13 +40,19 @@ public:
     CLLutAllocator();
     /** Default destructor. */
     ~CLLutAllocator() = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLLutAllocator(const CLLutAllocator &) = delete;
-    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers) */
     const CLLutAllocator &operator=(const CLLutAllocator &) = delete;
-    /** Interface to be implemented by the child class to return the pointer to the mapped data. */
+    /** Interface to be implemented by the child class to return the pointer to the mapped data.
+     *
+     * @return pointer to the mapped data.
+     */
     uint8_t *data();
-    /** Interface to be implemented by the child class to return the pointer to the CL data. */
+    /** Interface to be implemented by the child class to return the pointer to the CL data.
+     *
+     * @return pointer to the CL data.
+     */
     const cl::Buffer &cl_data() const;
     /** Enqueue a map operation of the allocated buffer on the given queue.
      *
diff --git a/arm_compute/runtime/CL/CLMemoryGroup.h b/arm_compute/runtime/CL/CLMemoryGroup.h
index a6f3eb1c3c..db23720253 100644
--- a/arm_compute/runtime/CL/CLMemoryGroup.h
+++ b/arm_compute/runtime/CL/CLMemoryGroup.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,6 +31,7 @@
 
 namespace arm_compute
 {
+/** Memory Group in OpenCL */
 using CLMemoryGroup = MemoryGroupBase<CLTensor>;
 
 template <>
diff --git a/arm_compute/runtime/CL/CLTensor.h b/arm_compute/runtime/CL/CLTensor.h
index 2c685d1ed1..e05f307621 100644
--- a/arm_compute/runtime/CL/CLTensor.h
+++ b/arm_compute/runtime/CL/CLTensor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -76,6 +76,7 @@ private:
     mutable CLTensorAllocator _allocator; /**< Instance of the OpenCL tensor allocator */
 };
 
+/** OpenCL Image */
 using CLImage = CLTensor;
 }
 #endif /*__ARM_COMPUTE_CLTENSOR_H__ */
diff --git a/arm_compute/runtime/CL/CLTensorAllocator.h b/arm_compute/runtime/CL/CLTensorAllocator.h
index 682de174a8..7515074afd 100644
--- a/arm_compute/runtime/CL/CLTensorAllocator.h
+++ b/arm_compute/runtime/CL/CLTensorAllocator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,28 +35,38 @@ namespace arm_compute
 class CLTensor;
 template <typename>
 class MemoryGroupBase;
+/** Memory Group in OpenCL */
 using CLMemoryGroup = MemoryGroupBase<CLTensor>;
 
 /** Basic implementation of a CL memory tensor allocator. */
 class CLTensorAllocator : public ITensorAllocator
 {
 public:
-    /** Default constructor. */
+    /** Default constructor.
+     *
+     * @param[in] owner (Optional) Owner of the allocator.
+     */
     CLTensorAllocator(CLTensor *owner = nullptr);
     /** Default destructor */
     ~CLTensorAllocator();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLTensorAllocator(const CLTensorAllocator &) = delete;
-    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers) */
     CLTensorAllocator &operator=(const CLTensorAllocator &) = delete;
     /** Allow instances of this class to be moved */
     CLTensorAllocator(CLTensorAllocator &&) = default;
     /** Allow instances of this class to be moved */
     CLTensorAllocator &operator=(CLTensorAllocator &&) = default;
 
-    /** Interface to be implemented by the child class to return the pointer to the mapped data. */
+    /** Interface to be implemented by the child class to return the pointer to the mapped data.
+     *
+     * @return pointer to the mapped data.
+     */
     uint8_t *data();
-    /** Interface to be implemented by the child class to return the pointer to the CL data. */
+    /** Interface to be implemented by the child class to return the pointer to the CL data.
+     *
+     * @return pointer to the CL data.
+     */
     const cl::Buffer &cl_data() const;
     /** Enqueue a map operation of the allocated buffer on the given queue.
      *
diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h
index 1c71709a7a..c1fbfd249c 100644
--- a/arm_compute/runtime/CL/CLTuner.h
+++ b/arm_compute/runtime/CL/CLTuner.h
@@ -71,7 +71,7 @@ public:
 
     /** Give read access to the LWS table
      *
-     * return The lws table as unordered_map container
+     * @return The lws table as unordered_map container
      */
     const std::unordered_map<std::string, cl::NDRange> &lws_table() const;
 
@@ -83,6 +83,7 @@ public:
      */
     void set_cl_kernel_event(cl_event kernel_event);
 
+    /** clEnqueueNDRangeKernel symbol */
     std::function<decltype(clEnqueueNDRangeKernel)> real_clEnqueueNDRangeKernel;
 
     /** Load the LWS table from file
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index 2e82457ee2..f2dd60340c 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -51,7 +51,10 @@ class ICLTensor;
 class CLGEMM : public IFunction
 {
 public:
-    /** Default constructor. */
+    /** Default constructor.
+     *
+     * @param[in] memory_manager (Optional) Memory manager.
+     */
     CLGEMM(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Initialise the kernel's inputs and output
      *
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index ca805d90f1..0548ce7be7 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -98,7 +98,10 @@ private:
 class CLGEMMConvolutionLayer : public IFunction
 {
 public:
-    /** Default constructor */
+    /** Default constructor
+     *
+     * @param[in] memory_manager (Optional) Memory manager.
+     */
     CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Set the input and output tensors.
      *
@@ -128,6 +131,8 @@ public:
      * @param[in]  conv_info    Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in]  weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. If this is not part of the fully connected layer the weights
      *                          tensor has also been transposed with CLGEMMTranspose1xWKernel. Data type supported: Same as @p input.
+     *
+     * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
                            const WeightsInfo &weights_info = WeightsInfo());
diff --git a/arm_compute/runtime/CL/functions/CLGaussian5x5.h b/arm_compute/runtime/CL/functions/CLGaussian5x5.h
index 3c60cc66a3..892fe146dd 100644
--- a/arm_compute/runtime/CL/functions/CLGaussian5x5.h
+++ b/arm_compute/runtime/CL/functions/CLGaussian5x5.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,7 +49,10 @@ class ICLTensor;
 class CLGaussian5x5 : public IFunction
 {
 public:
-    /** Default Constructor. */
+    /** Default Constructor.
+     *
+     * @param[in] memory_manager (Optional) Memory manager.
+     */
     CLGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Initialise the function's source, destinations and border mode.
      *
diff --git a/arm_compute/runtime/CL/functions/CLHistogram.h b/arm_compute/runtime/CL/functions/CLHistogram.h
index 455b61812d..05b08db9da 100644
--- a/arm_compute/runtime/CL/functions/CLHistogram.h
+++ b/arm_compute/runtime/CL/functions/CLHistogram.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,7 +31,6 @@ namespace arm_compute
 {
 class ICLDistribution1D;
 class ICLTensor;
-using ICLTensor = ICLImage;
 
 /** Basic function to execute histogram. This function calls the following OpenCL kernels:
  *
diff --git a/arm_compute/runtime/CL/functions/CLOpticalFlow.h b/arm_compute/runtime/CL/functions/CLOpticalFlow.h
index 94dda186bf..e2aaf404ce 100644
--- a/arm_compute/runtime/CL/functions/CLOpticalFlow.h
+++ b/arm_compute/runtime/CL/functions/CLOpticalFlow.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,9 +43,12 @@ namespace arm_compute
 {
 class CLPyramid;
 
+/** OpenCL Array of Internal Keypoints */
 using CLLKInternalKeypointArray = CLArray<CLLKInternalKeypoint>;
-using CLCoefficientTableArray   = CLArray<CLCoefficientTable>;
-using CLOldValueArray           = CLArray<CLOldValue>;
+/** OpenCL Array of Coefficient Tables */
+using CLCoefficientTableArray = CLArray<CLCoefficientTable>;
+/** OpenCL Array of Old Values */
+using CLOldValueArray = CLArray<CLOldValue>;
 
 /** Basic function to execute optical flow. This function calls the following OpenCL kernels and functions:
  *
diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h
index 4ce17ae3a9..abec9b8dc5 100644
--- a/arm_compute/runtime/CL/functions/CLReductionOperation.h
+++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,7 +45,10 @@ class ICLTensor;
 class CLReductionOperation : public IFunction
 {
 public:
-    /* Constructor */
+    /** Default Constructor.
+     *
+     * @param[in] memory_manager (Optional) Memory manager.
+     */
     CLReductionOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
 
     /** Set the input and output tensors.
diff --git a/arm_compute/runtime/CL/functions/CLSobel5x5.h b/arm_compute/runtime/CL/functions/CLSobel5x5.h
index 3e603f8311..2b5807b54a 100644
--- a/arm_compute/runtime/CL/functions/CLSobel5x5.h
+++ b/arm_compute/runtime/CL/functions/CLSobel5x5.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,7 +49,10 @@ class ICLTensor;
 class CLSobel5x5 : public IFunction
 {
 public:
-    /** Default Constructor. */
+    /** Default Constructor.
+     *
+     * @param[in] memory_manager (Optional) Memory manager.
+     */
     CLSobel5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Initialise the function's source, destinations and border mode.
      *
diff --git a/arm_compute/runtime/CL/functions/CLSobel7x7.h b/arm_compute/runtime/CL/functions/CLSobel7x7.h
index 0dc0a1c5e9..65b3cf2da0 100644
--- a/arm_compute/runtime/CL/functions/CLSobel7x7.h
+++ b/arm_compute/runtime/CL/functions/CLSobel7x7.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,7 +49,10 @@ class ICLTensor;
 class CLSobel7x7 : public IFunction
 {
 public:
-    /** Default Constructor. */
+    /** Default Constructor.
+     *
+     * @param[in] memory_manager (Optional) Memory manager.
+     */
     CLSobel7x7(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Initialise the function's source, destinations and border mode.
      *
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCTensor.h b/arm_compute/runtime/GLES_COMPUTE/GCTensor.h
index 0f5f194266..05305f9b1b 100644
--- a/arm_compute/runtime/GLES_COMPUTE/GCTensor.h
+++ b/arm_compute/runtime/GLES_COMPUTE/GCTensor.h
@@ -40,10 +40,10 @@ public:
     /** Default constructor */
     GCTensor();
 
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     GCTensor(const GCTensor &) = delete;
 
-    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers) */
     GCTensor &operator=(const GCTensor &) = delete;
 
     /** Allow instances of this class to be moved */
@@ -94,6 +94,7 @@ private:
     mutable GCTensorAllocator _allocator; /**< Instance of the OpenGL ES tensor allocator */
 };
 
+/** OpenGL ES Image */
 using GCImage = GCTensor;
 }
 
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h b/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h
index fc14f04ac2..1bd3582b6c 100644
--- a/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h
+++ b/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h
@@ -59,10 +59,10 @@ public:
     /** Default constructor. */
     GCTensorAllocator(GCTensor *owner = nullptr);
 
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     GCTensorAllocator(const GCTensorAllocator &) = delete;
 
-    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers) */
     GCTensorAllocator &operator=(const GCTensorAllocator &) = delete;
 
     /** Allow instances of this class to be moved */
@@ -74,7 +74,10 @@ public:
     /** Default destructor */
     ~GCTensorAllocator();
 
-    /** Interface to be implemented by the child class to return the pointer to the mapped data. */
+    /** Interface to be implemented by the child class to return the pointer to the mapped data.
+     *
+     * @return a pointer to the data.
+     */
     uint8_t *data();
 
     /** Get the OpenGL ES buffer object name
diff --git a/arm_compute/runtime/ILifetimeManager.h b/arm_compute/runtime/ILifetimeManager.h
index 6f2c68d372..36743ac404 100644
--- a/arm_compute/runtime/ILifetimeManager.h
+++ b/arm_compute/runtime/ILifetimeManager.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,7 +43,7 @@ public:
     virtual ~ILifetimeManager() = default;
     /** Registers a group to the lifetime manager and assigns a group id
      *
-     * @return The group id of the group
+     * @param[in] group The group id of the group
      */
     virtual void register_group(IMemoryGroup *group) = 0;
     /** Registers and starts lifetime of an object
diff --git a/arm_compute/runtime/LutAllocator.h b/arm_compute/runtime/LutAllocator.h
index 76b596bfa0..077b4693af 100644
--- a/arm_compute/runtime/LutAllocator.h
+++ b/arm_compute/runtime/LutAllocator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,7 +37,10 @@ class LutAllocator : public ILutAllocator
 public:
     /** Default constructor. */
     LutAllocator();
-    /** Interface to be implemented by the child class to return the pointer to the allocate data. */
+    /** Interface to be implemented by the child class to return the pointer to the allocate data.
+     *
+     * @return a pointer to the data.
+     */
     uint8_t *data() const;
 
 protected:
diff --git a/arm_compute/runtime/MemoryGroup.h b/arm_compute/runtime/MemoryGroup.h
index d3f647e7d4..9127d0849d 100644
--- a/arm_compute/runtime/MemoryGroup.h
+++ b/arm_compute/runtime/MemoryGroup.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,7 @@
 
 namespace arm_compute
 {
+/** Memory Group */
 using MemoryGroup = MemoryGroupBase<Tensor>;
 
 template <>
diff --git a/arm_compute/runtime/MemoryGroupBase.h b/arm_compute/runtime/MemoryGroupBase.h
index 19e9834923..dc640f10a6 100644
--- a/arm_compute/runtime/MemoryGroupBase.h
+++ b/arm_compute/runtime/MemoryGroupBase.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,9 +44,9 @@ public:
     MemoryGroupBase(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Default destructor */
     ~MemoryGroupBase() = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     MemoryGroupBase(const MemoryGroupBase &) = delete;
-    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers) */
     MemoryGroupBase &operator=(const MemoryGroupBase &) = delete;
     /** Allow instances of this class to be moved */
     MemoryGroupBase(MemoryGroupBase &&) = default;
diff --git a/arm_compute/runtime/NEON/AssemblyHelper.h b/arm_compute/runtime/NEON/AssemblyHelper.h
index 2b304b8022..e2d27cf941 100644
--- a/arm_compute/runtime/NEON/AssemblyHelper.h
+++ b/arm_compute/runtime/NEON/AssemblyHelper.h
@@ -40,26 +40,38 @@
 
 namespace arm_compute
 {
+/** Assembly kernel glue */
 template <typename TypeInput, typename TypeOutput>
 class AssemblyKernelGlue final
 {
 public:
+    /** Operator type */
     using TypeOperator = TypeInput;
-    using TypeResult   = TypeOutput;
+    /** Result type */
+    using TypeResult = TypeOutput;
+    /** Default constructor. */
     AssemblyKernelGlue()
         : _gemm_kernel_asm(nullptr), _optimised_kernel(nullptr), _a(nullptr), _b(nullptr), _d(nullptr)
     {
     }
+    /** Assembly Gemm */
     using AssemblyGemm = arm_gemm::GemmCommon<TypeInput, TypeOutput>;
 
+    /** Prevent instances of this class from being copy constructed */
     const AssemblyKernelGlue<TypeInput, TypeOutput> &operator=(const AssemblyKernelGlue<TypeInput, TypeOutput> &) = delete;
+    /** Prevent instances of this class from being copied */
     AssemblyKernelGlue(const AssemblyKernelGlue<TypeInput, TypeOutput> &) = delete;
 
+    /** Assembly Gemm kernel */
     std::unique_ptr<AssemblyGemm> _gemm_kernel_asm;
-    std::unique_ptr<INEKernel>    _optimised_kernel;
-    const ITensor                *_a;
-    const ITensor                *_b;
-    ITensor                      *_d;
+    /** Optimised NEON kernel */
+    std::unique_ptr<INEKernel> _optimised_kernel;
+    /** Input A */
+    const ITensor *_a;
+    /** Input B */
+    const ITensor *_b;
+    /** Output */
+    ITensor *_d;
 
     /** Configures the arrays pointers and strides in the assembly kernel and executes the assembly kernel.
      *  The call to set_arrays is needed to deal with the input sizes containing batches (dims > 2)
@@ -91,10 +103,21 @@ public:
     }
 };
 
-using AssemblyKernelGlueF32   = AssemblyKernelGlue<float, float>;
+/** Float 32 assembly kernel glue */
+using AssemblyKernelGlueF32 = AssemblyKernelGlue<float, float>;
+/** Uint 8 to Uint 32 kernel glue */
 using AssemblyKernelGlueU8U32 = AssemblyKernelGlue<uint8_t, uint32_t>;
+/** Int 8 to Int 32 kernel glue */
 using AssemblyKernelGlueS8S32 = AssemblyKernelGlue<int8_t, int32_t>;
 
+/** Allocate a workspace tensor.
+ *
+ * @param[in]  workspace_size Size to allocate.
+ * @param[out] workspace      Tensor to allocate.
+ * @param[in]  memory_group   Tensor memory group.
+ * @param[in]  alignment      Workspace memory alignment.
+ * @param[in]  num_threads    Number of workspace threads.
+ */
 inline void allocate_workspace(size_t workspace_size, Tensor &workspace, MemoryGroup &memory_group, size_t alignment, unsigned int num_threads)
 {
     ARM_COMPUTE_ERROR_ON_MSG(workspace_size == 0, "size cannot be 0");
@@ -102,6 +125,17 @@ inline void allocate_workspace(size_t workspace_size, Tensor &workspace, MemoryG
     workspace.allocator()->allocate();
 }
 
+/** Create a wrapper kernel.
+ *
+ * @param[in]  a     Input tensor A.
+ * @param[in]  b     Input tensor B.
+ * @param[in]  c     (Optional) Input tensor C.
+ * @param[out] d     Output tensor.
+ * @param[in]  alpha Alpha value.
+ * @param[in]  beta  Beta value.
+ *
+ * @return the wrapper kernel.
+ */
 template <typename T>
 std::unique_ptr<NEGEMMAssemblyWrapper<T>> create_wrapper_kernel(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta)
 {
@@ -128,6 +162,20 @@ std::unique_ptr<NEGEMMAssemblyWrapper<T>> create_wrapper_kernel(const ITensor *a
     return nullptr;
 }
 
+/** Setup assembly kernel.
+ *
+ * @param[in]  a            Input tensor A.
+ * @param[in]  b            Input tensor B.
+ * @param[in]  c            (Optional) Input tensor C.
+ * @param[in]  d            Output tensor.
+ * @param[in]  alpha        Alpha value.
+ * @param[in]  beta         Beta value.
+ * @param[out] workspace    Workspace tensor
+ * @param[in]  memory_group Tensor memory group.
+ * @param[out] asm_glue     Assembly glue kernel.
+ *
+ * @return True if the assembly kernel is setup correctly.
+ */
 template <typename T>
 inline bool setup_assembly_kernel(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta,
                                   Tensor &workspace, MemoryGroup &memory_group, T &asm_glue)
diff --git a/arm_compute/runtime/NEON/NEScheduler.h b/arm_compute/runtime/NEON/NEScheduler.h
index 94c82b2f03..affc281cc2 100644
--- a/arm_compute/runtime/NEON/NEScheduler.h
+++ b/arm_compute/runtime/NEON/NEScheduler.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,6 +28,7 @@
 
 namespace arm_compute
 {
+/** NEON Scheduler */
 using NEScheduler = Scheduler;
 }
 #endif /*__ARM_COMPUTE_NESCHEDULER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NECannyEdge.h b/arm_compute/runtime/NEON/functions/NECannyEdge.h
index b7e0ffbcf1..17fefcc0ab 100644
--- a/arm_compute/runtime/NEON/functions/NECannyEdge.h
+++ b/arm_compute/runtime/NEON/functions/NECannyEdge.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,6 +56,8 @@ public:
     /** Constructor
      *
      * Initialize Sobel kernel to nullptr.
+     *
+     * @param[in] memory_manager (Optional) Memory manager.
      */
     NECannyEdge(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
diff --git a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h
index a9a6786db5..6ea14a38e5 100644
--- a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h
+++ b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -64,6 +64,8 @@ public:
     /** Constructor
      *
      * Initialize _sobel, _harris_score and _corner_list to nullptr.
+     *
+     * @param[in] memory_manager (Optional) Memory manager.
      */
     NEHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Initialize the function's source, destination, conv and border_mode.
diff --git a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h
index 5d1fbe3a22..ad703f0788 100644
--- a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h
+++ b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,6 +42,7 @@ namespace arm_compute
 {
 class Pyramid;
 
+/** Array of LK Internel Keypoints */
 using LKInternalKeypointArray = Array<NELKInternalKeypoint>;
 /** Basic function to execute optical flow. This function calls the following NEON kernels and functions:
  *
@@ -52,7 +53,10 @@ using LKInternalKeypointArray = Array<NELKInternalKeypoint>;
 class NEOpticalFlow : public IFunction
 {
 public:
-    /** Constructor */
+    /** Constructor
+     *
+     * @param[in] memory_manager (Optional) Memory manager.
+     */
     NEOpticalFlow(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEOpticalFlow(const NEOpticalFlow &) = delete;
diff --git a/arm_compute/runtime/Scheduler.h b/arm_compute/runtime/Scheduler.h
index 524622f89d..7e10461b5a 100644
--- a/arm_compute/runtime/Scheduler.h
+++ b/arm_compute/runtime/Scheduler.h
@@ -33,12 +33,13 @@ namespace arm_compute
 class Scheduler
 {
 public:
+    /** Scheduler type */
     enum class Type
     {
-        ST,    // Single thread.
-        CPP,   // C++11 threads.
-        OMP,   // OpenMP.
-        CUSTOM // Provided by the user.
+        ST,    /**< Single thread. */
+        CPP,   /**< C++11 threads. */
+        OMP,   /**< OpenMP. */
+        CUSTOM /**< Provided by the user. */
     };
     /** Sets the user defined scheduler and makes it the active scheduler.
      *
@@ -63,6 +64,8 @@ public:
      */
     static Type get_type();
     /** Returns true if the given scheduler type is supported. False otherwise.
+     *
+     * @param[in] t the type of the scheduler to check.
      *
      * @return true if the given scheduler type is supported. False otherwise.
      */
diff --git a/arm_compute/runtime/Tensor.h b/arm_compute/runtime/Tensor.h
index 1fe73a2353..7022afff87 100644
--- a/arm_compute/runtime/Tensor.h
+++ b/arm_compute/runtime/Tensor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -60,6 +60,7 @@ private:
     mutable TensorAllocator _allocator; /**< Instance of the basic CPU allocator.*/
 };
 
+/** Image */
 using Image = Tensor;
 }
 #endif /*__ARM_COMPUTE_TENSOR_H__ */
diff --git a/arm_compute/runtime/TensorAllocator.h b/arm_compute/runtime/TensorAllocator.h
index 9af100c129..2ad37d0576 100644
--- a/arm_compute/runtime/TensorAllocator.h
+++ b/arm_compute/runtime/TensorAllocator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,13 +44,16 @@ using MemoryGroup = MemoryGroupBase<Tensor>;
 class TensorAllocator : public ITensorAllocator
 {
 public:
-    /** Default constructor. */
+    /** Default constructor.
+     *
+     * @param[in] owner Owner of the tensor allocator.
+     */
     TensorAllocator(Tensor *owner = nullptr);
     /** Default destructor */
     ~TensorAllocator();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     TensorAllocator(const TensorAllocator &) = delete;
-    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers) */
     TensorAllocator &operator=(const TensorAllocator &) = delete;
     /** Allow instances of this class to be moved */
     TensorAllocator(TensorAllocator &&) noexcept;
@@ -71,7 +74,10 @@ public:
      */
     void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo sub_info);
 
-    /** Returns the pointer to the allocated data. */
+    /** Returns the pointer to the allocated data.
+     *
+     * @return a pointer to the allocated data.
+     */
     uint8_t *data() const;
 
     /** Allocate size specified by TensorInfo of CPU memory.
-- 
cgit v1.2.1