From 49b9e100bfbb3b8da01472a0ff48b2bd92944e01 Mon Sep 17 00:00:00 2001
From: surmeh01 <surabhi.mehta@arm.com>
Date: Thu, 17 May 2018 14:11:25 +0100
Subject: Release 18.05

---
 Android.bp                   |  65 ++-
 Android.mk                   |   8 +-
 ArmnnDriver.cpp              |   1 +
 LICENSE                      |  12 +-
 ModelToINetworkConverter.cpp | 270 +++++++++----
 NnapiSupport.txt             |  51 +++
 README.md                    |   2 +
 ReleaseNotes.txt             |  59 ---
 RequestThread.cpp            |  26 +-
 service.cpp                  |  28 +-
 test/Android.mk              |  15 +-
 test/Concurrent.cpp          | 109 +++++
 test/Convolution2D.cpp       | 110 +++++
 test/DriverTestHelpers.cpp   | 218 ++++++++++
 test/DriverTestHelpers.hpp   | 135 +++++++
 test/FullyConnected.cpp      | 254 ++++++++++++
 test/GenericLayerTests.cpp   | 196 +++++++++
 test/Merger.cpp              | 408 +++++++++++++++++++
 test/SystemProperties.cpp    |  57 +++
 test/TestTensor.cpp          |  32 ++
 test/TestTensor.hpp          |  32 ++
 test/Tests.cpp               | 933 +------------------------------------------
 test/UtilsTests.cpp          |   6 +-
 23 files changed, 1922 insertions(+), 1105 deletions(-)
 create mode 100644 NnapiSupport.txt
 delete mode 100644 ReleaseNotes.txt
 create mode 100644 test/Concurrent.cpp
 create mode 100644 test/Convolution2D.cpp
 create mode 100644 test/DriverTestHelpers.cpp
 create mode 100644 test/DriverTestHelpers.hpp
 create mode 100644 test/FullyConnected.cpp
 create mode 100644 test/GenericLayerTests.cpp
 create mode 100644 test/Merger.cpp
 create mode 100644 test/SystemProperties.cpp
 create mode 100644 test/TestTensor.cpp
 create mode 100644 test/TestTensor.hpp

diff --git a/Android.bp b/Android.bp
index 0daab064..03b2ded4 100644
--- a/Android.bp
+++ b/Android.bp
@@ -19,6 +19,7 @@ cc_library_static {
         "clframework/src/core/AccessWindowAutoPadding.cpp",
         "clframework/src/core/AccessWindowStatic.cpp",
         "clframework/src/core/AccessWindowTranspose.cpp",
+        "clframework/src/core/GPUTarget.cpp",
         "clframework/src/core/CL/CLHelpers.cpp",
         "clframework/src/core/CL/CLKernelLibrary.cpp",
         "clframework/src/core/CL/ICLDistribution1D.cpp",
@@ -48,10 +49,12 @@ cc_library_static {
         "clframework/src/core/CL/kernels/CLCol2ImKernel.cpp",
         "clframework/src/core/CL/kernels/CLColorConvertKernel.cpp",
         "clframework/src/core/CL/kernels/CLConvolutionKernel.cpp",
+        "clframework/src/core/CL/kernels/CLCopyKernel.cpp",
         "clframework/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp",
         "clframework/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp",
         "clframework/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp",
-        "clframework/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp",
+        "clframework/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp",
+        "clframework/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp",
         "clframework/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp",
         "clframework/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp",
         "clframework/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp",
@@ -115,12 +118,17 @@ cc_library_static {
         "clframework/src/core/CL/kernels/CLWarpAffineKernel.cpp",
         "clframework/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp",
         "clframework/src/core/CL/kernels/CLWeightsReshapeKernel.cpp",
+        "clframework/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp",
+        "clframework/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp",
+        "clframework/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp",
         "clframework/src/core/CL/OpenCL.cpp",
+        "clframework/src/core/CPP/CPPTypes.cpp",
         "clframework/src/core/CPP/ICPPSimpleKernel.cpp",
         "clframework/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp",
         "clframework/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp",
         "clframework/src/core/CPP/kernels/CPPPermuteKernel.cpp",
         "clframework/src/core/CPP/kernels/CPPSortEuclideanDistanceKernel.cpp",
+        "clframework/src/core/CPP/kernels/CPPUpsampleKernel.cpp",
         "clframework/src/core/Error.cpp",
         "clframework/src/core/Helpers.cpp",
         "clframework/src/core/HOGInfo.cpp",
@@ -130,6 +138,12 @@ cc_library_static {
         "clframework/src/core/IKernel.cpp",
         "clframework/src/core/ITensor.cpp",
         "clframework/src/core/MultiImageInfo.cpp",
+        "clframework/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp",
+        "clframework/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp",
+        "clframework/src/core/NEON/kernels/arm_gemm/gemm_int16.cpp",
+        "clframework/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp",
+        "clframework/src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp",
+        "clframework/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp",
         "clframework/src/core/NEON/kernels/convolution/common/utils.cpp",
         "clframework/src/core/NEON/kernels/convolution/depthwise/depthwise_2x2_3x3_1x1_fp32_fp32.cpp",
         "clframework/src/core/NEON/kernels/convolution/depthwise/depthwise_2x2_3x3_2x2_fp32_fp32.cpp",
@@ -187,7 +201,6 @@ cc_library_static {
         "clframework/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp",
         "clframework/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp",
         "clframework/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp",
-        "clframework/src/core/NEON/kernels/NEGEMMInterleaveBlockedKernel.cpp",
         "clframework/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp",
         "clframework/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp",
         "clframework/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp",
@@ -234,7 +247,7 @@ cc_library_static {
         "clframework/src/core/NEON/kernels/NETransposeKernel.cpp",
         "clframework/src/core/NEON/kernels/NEWarpKernel.cpp",
         "clframework/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp",
-        "clframework/src/core/NEON/kernels/NEWinogradLayerKernel.cpp",
+        "clframework/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp",
         "clframework/src/core/PyramidInfo.cpp",
         "clframework/src/core/Rounding.cpp",
         "clframework/src/core/SubTensorInfo.cpp",
@@ -255,6 +268,8 @@ cc_library_static {
         "clframework/src/runtime/CL/CLHOG.cpp",
         "clframework/src/runtime/CL/CLLutAllocator.cpp",
         "clframework/src/runtime/CL/CLLut.cpp",
+        "clframework/src/runtime/CL/CLMemory.cpp",
+        "clframework/src/runtime/CL/CLMemoryRegion.cpp",
         "clframework/src/runtime/CL/CLMultiHOG.cpp",
         "clframework/src/runtime/CL/CLMultiImage.cpp",
         "clframework/src/runtime/CL/CLPyramid.cpp",
@@ -280,6 +295,7 @@ cc_library_static {
         "clframework/src/runtime/CL/functions/CLColorConvert.cpp",
         "clframework/src/runtime/CL/functions/CLConvolution.cpp",
         "clframework/src/runtime/CL/functions/CLConvolutionLayer.cpp",
+        "clframework/src/runtime/CL/functions/CLCopy.cpp",
         "clframework/src/runtime/CL/functions/CLDeconvolutionLayer.cpp",
         "clframework/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp",
         "clframework/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp",
@@ -333,6 +349,7 @@ cc_library_static {
         "clframework/src/runtime/CL/functions/CLReductionOperation.cpp",
         "clframework/src/runtime/CL/functions/CLRemap.cpp",
         "clframework/src/runtime/CL/functions/CLReshapeLayer.cpp",
+        "clframework/src/runtime/CL/functions/CLRNNLayer.cpp",
         "clframework/src/runtime/CL/functions/CLROIPoolingLayer.cpp",
         "clframework/src/runtime/CL/functions/CLScale.cpp",
         "clframework/src/runtime/CL/functions/CLScharr3x3.cpp",
@@ -345,11 +362,16 @@ cc_library_static {
         "clframework/src/runtime/CL/functions/CLTranspose.cpp",
         "clframework/src/runtime/CL/functions/CLWarpAffine.cpp",
         "clframework/src/runtime/CL/functions/CLWarpPerspective.cpp",
+        "clframework/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp",
+        "clframework/src/runtime/CL/functions/CLWinogradInputTransform.cpp",
         "clframework/src/runtime/CL/ICLSimpleFunction.cpp",
+        "clframework/src/runtime/CL/tuners/BifrostTuner.cpp",
         "clframework/src/runtime/CPP/CPPScheduler.cpp",
         "clframework/src/runtime/CPP/functions/CPPPermute.cpp",
+        "clframework/src/runtime/CPP/functions/CPPUpsample.cpp",
         "clframework/src/runtime/CPP/ICPPSimpleFunction.cpp",
         "clframework/src/runtime/CPP/SingleThreadScheduler.cpp",
+        "clframework/src/runtime/CPUUtils.cpp",
         "clframework/src/runtime/Distribution1D.cpp",
         "clframework/src/runtime/HOG.cpp",
         "clframework/src/runtime/ILutAllocator.cpp",
@@ -446,7 +468,7 @@ cc_library_static {
         "clframework/src/runtime/NEON/functions/NETranspose.cpp",
         "clframework/src/runtime/NEON/functions/NEWarpAffine.cpp",
         "clframework/src/runtime/NEON/functions/NEWarpPerspective.cpp",
-        "clframework/src/runtime/NEON/functions/NEWinogradLayer.cpp",
+        "clframework/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp",
         "clframework/src/runtime/NEON/INESimpleFunction.cpp",
         "clframework/src/runtime/OffsetLifetimeManager.cpp",
         "clframework/src/runtime/OffsetMemoryPool.cpp",
@@ -463,16 +485,32 @@ cc_library_static {
     ],
     arch: {
         arm: {
-            srcs: ["clframework/src/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.cpp"],
+            srcs: [
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a53.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a55r1.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/generic.cpp"
+            ],
         },
         arm64: {
-            srcs: ["clframework/src/core/NEON/kernels/arm64/NEGEMMAArch64Kernel.cpp",
-                   "clframework/src/core/NEON/kernels/arm64/NEGEMMAArch64NativeKernel.cpp",
-                   "clframework/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.cpp",
-                   "clframework/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp",
-                   "clframework/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp",
-                   "clframework/src/core/NEON/kernels/arm64/NEGEMVAArch64Kernel.cpp",
-                   "clframework/src/core/NEON/kernels/arm64/NEHGEMMAArch64FP16Kernel.cpp"],
+            srcs: [
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_12x8/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8/a55r1.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8/a55r1.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8/a55r1.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8/a53.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8/a55.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8/a55r1.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_native_16x4/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp",
+                "clframework/src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_trans/generic.cpp"
+            ],
         },
     },
     cppflags: [
@@ -481,6 +519,8 @@ cc_library_static {
         "-DEMBEDDED_KERNELS",
         "-DARM_COMPUTE_ASSERTS_ENABLED",
         "-Wno-unused-parameter",
+        "-DNO_DOT_IN_TOOLCHAIN",
+        "-no-integrated-as"
     ],
     rtti: true,
 }
@@ -492,6 +532,7 @@ cc_library_static {
 ////////////////////////////////////////////
 cc_defaults {
     name: "libboost-defaults",
+    proprietary: true,
     export_include_dirs: ["boost_1_64_0"],
     cflags: [
         "-O3",
diff --git a/Android.mk b/Android.mk
index 48868790..d6b013e9 100644
--- a/Android.mk
+++ b/Android.mk
@@ -7,7 +7,7 @@ ANDROID_NN_DRIVER_LOCAL_PATH := $(call my-dir)
 LOCAL_PATH := $(ANDROID_NN_DRIVER_LOCAL_PATH)
 
 # Configure these paths if you move the source or Khronos headers
-OPENCL_HEADER_PATH := $(LOCAL_PATH)/../mali/product/khronos/original
+OPENCL_HEADER_PATH := $(LOCAL_PATH)/clframework/include
 NN_HEADER_PATH := $(LOCAL_PATH)/../../../frameworks/ml/nn/runtime/include
 
 ###################
@@ -18,6 +18,7 @@ include $(CLEAR_VARS)
 LOCAL_MODULE := libarmnn-driver
 LOCAL_MODULE_TAGS := eng optional
 LOCAL_ARM_MODE := arm
+LOCAL_PROPRIETARY_MODULE := true
 # Mark source files as dependent on Android.mk
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 
@@ -108,9 +109,8 @@ LOCAL_SHARED_LIBRARIES :=  \
 	libhidlmemory \
 	libdl \
 	libhardware \
-	libtextclassifier \
-	libtextclassifier_hash \
 	liblog \
+	libtextclassifier_hash \
 	libutils \
 	android.hardware.neuralnetworks@1.0 \
 	android.hidl.allocator@1.0 \
@@ -125,4 +125,4 @@ include $(BUILD_EXECUTABLE)
 # Note we use ANDROID_NN_DRIVER_LOCAL_PATH rather than LOCAL_PATH because LOCAL_PATH will be overwritten
 # when including other .mk files that set it.
 include $(ANDROID_NN_DRIVER_LOCAL_PATH)/armnn/Android.mk
-include $(ANDROID_NN_DRIVER_LOCAL_PATH)/test/Android.mk
\ No newline at end of file
+include $(ANDROID_NN_DRIVER_LOCAL_PATH)/test/Android.mk
diff --git a/ArmnnDriver.cpp b/ArmnnDriver.cpp
index 19624649..92487ccd 100644
--- a/ArmnnDriver.cpp
+++ b/ArmnnDriver.cpp
@@ -43,6 +43,7 @@ DriverOptions::DriverOptions(armnn::Compute computeDevice)
 : m_ComputeDevice(computeDevice)
 , m_VerboseLogging(false)
 , m_UseAndroidNnCpuExecutor(false)
+, m_ClTunedParametersMode(armnn::IClTunedParameters::Mode::UseTunedParameters)
 {
 }
 
diff --git a/LICENSE b/LICENSE
index 18e83ec1..af3b5100 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,12 +1,12 @@
-Copyright (c) 2017 ARM Limited.
+MIT License
 
-SPDX-License-Identifier: MIT
+Copyright (c) 2017 ARM Limited.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to
-deal in the Software without restriction, including without limitation the
-rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-sell copies of the Software, and to permit persons to whom the Software is
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 
 The above copyright notice and this permission notice shall be included in all
diff --git a/ModelToINetworkConverter.cpp b/ModelToINetworkConverter.cpp
index 68ebef00..bd2443e2 100644
--- a/ModelToINetworkConverter.cpp
+++ b/ModelToINetworkConverter.cpp
@@ -19,6 +19,41 @@
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/cast.hpp>
 
+namespace armnn_driver
+{
+class LayerInputHandle
+{
+public:
+    LayerInputHandle()
+        : m_OutputSlot(nullptr)
+        , m_Valid(false)
+    {}
+
+    LayerInputHandle(bool valid, armnn::IOutputSlot* outputSlot, armnn::TensorInfo tensorInfo)
+        : m_OutputSlot(outputSlot)
+        , m_Valid(valid)
+        , m_TensorInfo(tensorInfo)
+    {}
+
+    bool IsValid() const { return m_Valid; }
+    void Connect(armnn::IInputSlot& inputSlot)
+    {
+        assert(IsValid());
+
+        if (m_OutputSlot)
+        {
+            m_OutputSlot->Connect(inputSlot);
+        }
+    }
+    const armnn::TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
+
+private:
+    armnn::IOutputSlot* m_OutputSlot;
+    bool m_Valid;
+    armnn::TensorInfo m_TensorInfo;
+};
+} // armnn_driver
+
 namespace
 {
 using namespace armnn_driver;
@@ -140,7 +175,10 @@ void SanitizeBiasQuantizationScale(armnn::TensorInfo& biasInfo,
     }
 }
 
+const armnn::PermutationVector IdentityPermutation({ 0U, 1U, 2U, 3U });
 const armnn::PermutationVector NHWCToArmNN({ 0U, 2U, 3U, 1U });
+const armnn::PermutationVector ArmNNToNHWC({ 0U, 3U, 1U, 2U });
+const armnn::PermutationVector SwapDim1And2({ 0U, 2U, 1U, 3U });
 
 template <typename OSlot>
 armnn::IConnectableLayer& AddPermuteLayer(armnn::INetwork& network, OSlot& input,
@@ -165,8 +203,6 @@ armnn::IConnectableLayer& SwizzleInDeswizzleOut(armnn::INetwork& network, LayerI
                                                 armnn::IConnectableLayer& firstLayer,
                                                 armnn::IConnectableLayer& lastLayer)
 {
-    static const armnn::PermutationVector ArmNNToNHWC({ 0U, 3U, 1U, 2U });
-
     // Add swizzle layer
     armnn::IConnectableLayer& swizzleLayer = AddPermuteLayer(network, input, NHWCToArmNN);
 
@@ -184,6 +220,71 @@ armnn::IConnectableLayer& SwizzleInDeswizzleOut(armnn::INetwork& network, LayerI
 {
     return SwizzleInDeswizzleOut(network, input, layer, layer);
 }
+
+bool ValidateConcatOutputShape(const std::vector<armnn::TensorShape> & inputShapes,
+                               const armnn::TensorShape & outputShape,
+                               uint32_t concatDim)
+{
+    // Validate the output shape is correct given the input shapes (which have just been validated)
+    unsigned int numDimensions = inputShapes[0].GetNumDimensions();
+    if (outputShape.GetNumDimensions() != numDimensions)
+    {
+        return Fail("%s: Output shape has wrong number of dimensions", __func__);
+    }
+
+    unsigned int outputSizeAlongConcatenatedDimension = 0;
+    for (unsigned int i = 0; i < inputShapes.size(); i++)
+    {
+        outputSizeAlongConcatenatedDimension += inputShapes[i][concatDim];
+    }
+
+    for (unsigned int i = 0; i < numDimensions; ++i)
+    {
+        if (i == concatDim)
+        {
+            if (outputShape[i] != outputSizeAlongConcatenatedDimension)
+            {
+                return Fail(
+                    "%s: Invalid output shape for dimension %d (%d != %d)",
+                    __func__,
+                    i,
+                    outputShape[i],
+                    outputSizeAlongConcatenatedDimension);
+            }
+        }
+        else
+        {
+            if (outputShape[i] != inputShapes[0][i])
+            {
+                return Fail("%s: Invalid output shape", __func__);
+            }
+        }
+    }
+
+    return true;
+}
+
+void SwizzleInputs(armnn::INetwork& network,
+                   std::vector<LayerInputHandle>& inputs,
+                   std::vector<armnn::TensorShape>& inputShapes,
+                   const armnn::PermutationVector& mapping)
+{
+    if (!mapping.IsEqual(IdentityPermutation))
+    {
+        size_t nInputs = inputs.size();
+        for (size_t i=0; i<nInputs; ++i)
+        {
+            // add swizzle layer
+            armnn::IConnectableLayer& swizzleLayer = AddPermuteLayer(network, inputs[i], mapping);
+            auto& outputSlot = swizzleLayer.GetOutputSlot(0);
+            auto& outputInfo = outputSlot.GetTensorInfo();
+            // replace inputs with the swizzled ones
+            inputs[i] = LayerInputHandle(true, &outputSlot, outputInfo);
+            inputShapes[i] = inputs[i].GetTensorInfo().GetShape();
+        }
+    }
+}
+
 } // namespace
 
 namespace armnn_driver
@@ -399,37 +500,6 @@ bool ModelToINetworkConverter::ConvertOperation(const Operation& operation)
     }
 }
 
-class LayerInputHandle
-{
-public:
-    LayerInputHandle()
-        : m_OutputSlot(nullptr)
-        , m_Valid(false)
-    {}
-
-    LayerInputHandle(bool valid, armnn::IOutputSlot* outputSlot, armnn::TensorInfo tensorInfo)
-        : m_OutputSlot(outputSlot)
-        , m_Valid(valid)
-        , m_TensorInfo(tensorInfo)
-    {}
-
-    bool IsValid() const { return m_Valid; }
-    void Connect(armnn::IInputSlot& inputSlot)
-    {
-        assert(IsValid());
-
-        if (m_OutputSlot)
-        {
-            m_OutputSlot->Connect(inputSlot);
-        }
-    }
-    const armnn::TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
-
-private:
-    armnn::IOutputSlot* m_OutputSlot;
-    bool m_Valid;
-    armnn::TensorInfo m_TensorInfo;
-};
 
 bool ModelToINetworkConverter::ConvertAdd(const Operation& operation)
 {
@@ -540,6 +610,65 @@ bool ModelToINetworkConverter::ConvertConcatenation(const Operation& operation)
     // Get inputs and outputs
     const std::size_t numInputTensors = operation.inputs.size() - 1;
 
+    int32_t concatDim;
+    if (!GetInputScalar(operation, numInputTensors, OperandType::INT32, concatDim))
+    {
+        return Fail("%s: Operation has invalid inputs", __func__);
+    }
+
+    const Operand* const outputOperand = GetOutputOperand(operation, 0);
+    if (!outputOperand)
+    {
+        return Fail("%s: Operation has no outputs", __func__);
+    }
+
+    armnn::TensorInfo  outputInfo  = GetTensorInfoForOperand(*outputOperand);
+    armnn::TensorShape outputShape = outputInfo.GetShape();
+
+    //
+    // handle negative concat dims along the lines of tensorflow as described here:
+    //    https://www.tensorflow.org/api_docs/python/tf/concat
+    // "negative axis refers to axis + rank(values)-th dimension"
+    //
+    if (concatDim < 0)
+    {
+        concatDim += outputShape.GetNumDimensions();
+    }
+
+    if (concatDim >= static_cast<int32_t>(outputShape.GetNumDimensions()) || concatDim < 0)
+    {
+        return Fail("%s: Operation has invalid concat axis: %d", __func__, concatDim);
+    }
+
+    // ArmNN uses Compute Library subtensors to perform concatenation
+    // This only works when concatenating along dimension 0 or 1 for a 4-D tensor,
+    // or along dimension 0 for a 3-D tensor.
+    const armnn::PermutationVector* permuteVectorIn = &IdentityPermutation;
+    const armnn::PermutationVector* permuteVectorOut = &IdentityPermutation;
+
+    assert(permuteVectorOut != nullptr);
+
+    if (outputShape.GetNumDimensions() == 4) {
+        if (concatDim == 3) {
+            concatDim = 1;
+            permuteVectorIn = &NHWCToArmNN;
+            permuteVectorOut = &ArmNNToNHWC;
+            outputShape = armnnUtils::Permuted(outputShape, *permuteVectorIn);
+            outputInfo.SetShape(outputShape);
+        } else if (concatDim == 2) {
+            concatDim = 1;
+            permuteVectorIn = &SwapDim1And2;
+            permuteVectorOut = &SwapDim1And2;
+            outputShape = armnnUtils::Permuted(outputShape, *permuteVectorIn);
+            outputInfo.SetShape(outputShape);
+        }
+    }
+    else if (!(outputShape.GetNumDimensions() == 3 && concatDim == 0))
+    {
+        // Operation unsupported
+        return false;
+    }
+
     std::vector<LayerInputHandle> inputHandles;
     std::vector<armnn::TensorShape> inputShapes;
 
@@ -556,6 +685,8 @@ bool ModelToINetworkConverter::ConvertConcatenation(const Operation& operation)
 
         inputShapes.emplace_back(GetTensorShapeForOperand(*operand));
         inputHandles.emplace_back(ConvertToLayerInputHandle(operation, i));
+
+
         if (!inputHandles.back().IsValid())
         {
             return Fail("%s: Operation has invalid inputs", __func__);
@@ -564,60 +695,30 @@ bool ModelToINetworkConverter::ConvertConcatenation(const Operation& operation)
 
     assert(inputShapes.size() == inputHandles.size());
 
-    uint32_t concatDim;
-    if (!GetInputScalar(operation, numInputTensors, OperandType::INT32, concatDim))
-    {
-        return Fail("%s: Operation has invalid inputs", __func__);
-    }
-
-    const Operand* const outputOperand = GetOutputOperand(operation, 0);
-    if (!outputOperand)
-    {
-        return Fail("%s: Operation has no outputs", __func__);
-    }
-    const armnn::TensorShape outputShape = GetTensorShapeForOperand(*outputOperand);
+    // this is no-op for identity swizzles, otherwise it replaces both
+    // the handles and shapes with the swizzled layer output handles and shapes
+    SwizzleInputs(*m_Network, inputHandles, inputShapes, *permuteVectorIn);
 
     // Create an armnn merger layer descriptor - this will also perform validation on the input shapes
     armnn::OriginsDescriptor mergerDescriptor;
     try
     {
-        mergerDescriptor = armnn::CreateMergerDescriptorForConcatenation(inputShapes.begin(), inputShapes.end(),
-            concatDim);
+        // The merger descriptor is always created across the only supported concat
+        // dimension, which is 0 or 1
+        mergerDescriptor =
+            armnn::CreateMergerDescriptorForConcatenation(
+                inputShapes.begin(), inputShapes.end(), concatDim);
     }
     catch (const armnn::Exception& error)
     {
         return Fail("%s: Error preparing merger descriptor. %s", __func__, error.what());
     }
 
-    // Validate the output shape is correct given the input shapes (which have just been validated)
-    unsigned int numDimensions = inputShapes[0].GetNumDimensions();
-    if (outputShape.GetNumDimensions() != numDimensions)
-    {
-        return Fail("%s: Output shape has wrong number of dimensions", __func__);
-    }
-
-    unsigned int outputSizeAlongConcatenatedDimension = 0;
-    for (unsigned int i = 0; i < inputShapes.size(); i++)
-    {
-        outputSizeAlongConcatenatedDimension += inputShapes[i][concatDim];
-    }
-
-    for (unsigned int i = 0; i < numDimensions; ++i)
+    // Validate the output shape is correct given the input shapes based on the
+    // only valid concat dimension which is 0 or 1
+    if (!ValidateConcatOutputShape(inputShapes, outputShape, concatDim))
     {
-        if (i == concatDim)
-        {
-            if (outputShape[i] != outputSizeAlongConcatenatedDimension)
-            {
-                return Fail("%s: Invalid output shape", __func__);
-            }
-        }
-        else
-        {
-            if (outputShape[i] != inputShapes[0][i])
-            {
-                return Fail("%s: Invalid output shape", __func__);
-            }
-        }
+        return Fail("%s: Error validating the output shape for concat", __func__);
     }
 
     std::vector<const armnn::TensorInfo*> inputTensorInfos;
@@ -634,15 +735,26 @@ bool ModelToINetworkConverter::ConvertConcatenation(const Operation& operation)
 
     armnn::IConnectableLayer* layer = m_Network->AddMergerLayer(mergerDescriptor);
     assert(layer != nullptr);
+    layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
 
     // Connect inputs to the layer
     const int numInputSlots = layer->GetNumInputSlots();
     assert(static_cast<std::size_t>(numInputSlots) == inputHandles.size());
     for (int i = 0; i < numInputSlots; ++i)
     {
+        // connect the input directly to the merge (concat) layer
         inputHandles[static_cast<unsigned int>(i)].Connect(layer->GetInputSlot(i));
     }
 
+    if (permuteVectorOut != &IdentityPermutation)
+    {
+        // Add permutation layer and connect the output to it, the permutation becomes the output layer
+        armnn::IConnectableLayer& deswizzleLayer = AddPermuteLayer(*m_Network,
+                                                                   layer->GetOutputSlot(0),
+                                                                   *permuteVectorOut);
+        layer = &deswizzleLayer;
+    }
+
     return SetupAndTrackLayerOutputSlot(operation, 0, *layer);
 }
 
@@ -726,8 +838,10 @@ bool ModelToINetworkConverter::ConvertConv2d(const Operation& operation)
                           armnn::IsConvolution2dSupported,
                           m_Compute,
                           swizzledInputInfo,
+                          swizzledOutputInfo,
                           desc,
-                          weights.GetInfo()))
+                          weights.GetInfo(),
+                          bias.GetInfo()))
     {
         return false;
     }
@@ -1047,7 +1161,7 @@ bool ModelToINetworkConverter::ConvertLocalResponseNormalization(const Operation
 bool ModelToINetworkConverter::ConvertLogistic(const Operation& operation)
 {
     armnn::ActivationDescriptor desc;
-    desc.m_Function == armnn::ActivationFunction::Sigmoid;
+    desc.m_Function = armnn::ActivationFunction::Sigmoid;
 
     return ConvertToActivation(operation, __func__, desc);
 }
@@ -1845,4 +1959,4 @@ bool ModelToINetworkConverter::IsOperationSupported(uint32_t operationIndex) con
 }
 
 
-} // armnn_driver
\ No newline at end of file
+} // armnn_driver
diff --git a/NnapiSupport.txt b/NnapiSupport.txt
new file mode 100644
index 00000000..8973d901
--- /dev/null
+++ b/NnapiSupport.txt
@@ -0,0 +1,51 @@
+------ ArmNN for Android NNAPI supported operations ------
+
+This release of ArmNN for Android supports use as a driver for the Android Neural Networks API. It implements the android.hardware.neuralnetworks@1.0 interface.
+
+For more information on the Android Neural Networks API, see https://developer.android.com/ndk/guides/neuralnetworks/index.html
+
+For integration and usage documentation, please see README.md.
+
+--- Support for Android Neural Networks HAL operations ---
+
+The following AndroidNN operations are currently supported.
+
+AndroidNN operator           Tensor type supported
+ADD                          (FLOAT32)
+AVERAGE_POOL_2D              (FLOAT32,QUANT8_ASYMM)
+CONCATENATION                (FLOAT32)
+CONV_2D                      (FLOAT32,QUANT8_ASYMM)
+DEPTHWISE_CONV_2D*           (FLOAT32,QUANT8_ASYMM)
+FLOOR                        (FLOAT32)
+FULLY_CONNECTED              (FLOAT32)
+L2_NORMALIZATION             (FLOAT32)
+L2_POOL_2D                   (FLOAT32)
+LOCAL_RESPONSE_NORMALIZATION (FLOAT32)
+LOGISTIC                     (FLOAT32,QUANT8_ASYMM)
+MAX_POOL_2D                  (FLOAT32,QUANT8_ASYMM)
+MUL                          (FLOAT32)
+RELU                         (FLOAT32,QUANT8_ASYMM)
+RELU1                        (FLOAT32,QUANT8_ASYMM)
+RELU6                        (FLOAT32,QUANT8_ASYMM)
+RESHAPE                      (FLOAT32,QUANT8_ASYMM)
+RESIZE_BILINEAR              (FLOAT32)
+SOFTMAX                      (FLOAT32,QUANT8_ASYMM)
+TANH                         (FLOAT32)
+
+* Depthwise convolution only supports a value of 1 for the depth multiplier. In addition, the QUANT8_ASYMM version only supports 3x3 kernels.
+
+--- Unsupported operators ---
+
+The following AndroidNN operations are currently not supported.
+
+DEPTH_TO_SPACE
+DEQUANTIZE
+EMBEDDING_LOOKUP
+HASHTABLE_LOOKUP
+LSH_PROJECTION
+LSTM
+RNN
+SPACE_TO_DEPTH
+SVDF
+
+Where operations are not supported by the ArmNN Android NN Driver, the driver indicates this to the framework appropriately and the framework implements those operations using a CPU implementation.
diff --git a/README.md b/README.md
index f549d2c2..4f780e7a 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,8 @@
 
 This directory contains the ArmNN driver for the Android Neural Networks API, implementing the android.hardware.neuralnetworks@1.0 HAL.
 
+For more information about supported operations and configurations, see NnapiSupport.txt
+
 ## Integration guide
 
 ### Prerequisites
diff --git a/ReleaseNotes.txt b/ReleaseNotes.txt
deleted file mode 100644
index 89f7e761..00000000
--- a/ReleaseNotes.txt
+++ /dev/null
@@ -1,59 +0,0 @@
------- ArmNN for Android 18.02 Release Notes ------
-
-This release of ArmNN for Android supports use as a driver for the Android Neural Networks API. It implements the android.hardware.neuralnetworks@1.0 interface.
-
-For more information on the Android Neural Networks API, see https://developer.android.com/ndk/guides/neuralnetworks/index.html
-
-For integration and usage documentation, please see README.md.
-
---- Support for Android Neural Networks HAL operations ---
-
-The following AndroidNN operations are currently supported.
-
-AndroidNN operator           Tensor type supported
-ADD                          (FLOAT32)
-AVERAGE_POOL_2D              (FLOAT32,QUANT8_ASYMM)
-CONCATENATION                (FLOAT32)
-CONV_2D                      (FLOAT32,QUANT8_ASYMM**)
-DEPTHWISE_CONV_2D***         (FLOAT32,QUANT8_ASYMM)
-FLOOR                        (FLOAT32)
-FULLY_CONNECTED              (FLOAT32)
-L2_NORMALIZATION             (FLOAT32)
-L2_POOL_2D                   (FLOAT32)
-LOCAL_RESPONSE_NORMALIZATION (FLOAT32)
-LOGISTIC                     (FLOAT32,QUANT8_ASYMM)
-MAX_POOL_2D                  (FLOAT32,QUANT8_ASYMM)
-MUL*                         (FLOAT32)
-RELU                         (FLOAT32,QUANT8_ASYMM)
-RELU1                        (FLOAT32,QUANT8_ASYMM)
-RELU6                        (FLOAT32,QUANT8_ASYMM)
-RESHAPE                      (FLOAT32,QUANT8_ASYMM)
-RESIZE_BILINEAR              (FLOAT32)
-SOFTMAX                      (FLOAT32,QUANT8_ASYMM)
-TANH                         (FLOAT32)
-
-* MUL currently does not support mixing of different tensor sizes.
-
-** QUANT8_ASYMM version does not support asymmetric padding. In addition, only the following configurations are supported:
-    1) 1x1 convolution with strides of 1 or 2 or 3
-    2) 3x3 convolution with strides of 1 or 2
-    3) 5x5 convolution with strides of 1 or 2
-
-*** Depthwise convolution only supports a value of 1 for the depth multiplier. In addition, the QUANT8_ASYMM version only supports 3x3 kernels.
-
-
---- Unsupported operators ---
-
-The following AndroidNN operations are currently not supported.
-
-DEPTH_TO_SPACE
-DEQUANTIZE
-EMBEDDING_LOOKUP
-HASHTABLE_LOOKUP
-LSH_PROJECTION
-LSTM
-RNN
-SPACE_TO_DEPTH
-SVDF
-
-Where operations are not supported by the ArmNN Android NN Driver, the driver indicates this to the framework appropriately and the framework implements those operations using a CPU implementation.
diff --git a/RequestThread.cpp b/RequestThread.cpp
index 708a46c8..abaee90c 100644
--- a/RequestThread.cpp
+++ b/RequestThread.cpp
@@ -26,12 +26,26 @@ RequestThread::RequestThread()
 RequestThread::~RequestThread()
 {
     ALOGV("RequestThread::~RequestThread()");
-    // post an EXIT message to the thread
-    std::shared_ptr<AsyncExecuteData> nulldata(nullptr);
-    auto pMsg = std::make_shared<ThreadMsg>(ThreadMsgType::EXIT, nulldata);
-    PostMsg(pMsg);
-    // Wait for the thread to terminate, it is deleted automatically
-    m_Thread->join();
+
+    try
+    {
+        // Coverity fix: The following code may throw an exception of type std::length_error.
+
+        // This code is meant to to terminate the inner thread gracefully by posting an EXIT message
+        // to the thread's message queue. However, according to Coverity, this code could throw an exception and fail.
+        // Since only one static instance of RequestThread is used in the driver (in ArmnnPreparedModel),
+        // this destructor is called only when the application has been closed, which means that
+        // the inner thread will be terminated anyway, although abruptly, in the event that the destructor code throws.
+        // Wrapping the destructor's code with a try-catch block simply fixes the Coverity bug.
+
+        // Post an EXIT message to the thread
+        std::shared_ptr<AsyncExecuteData> nulldata(nullptr);
+        auto pMsg = std::make_shared<ThreadMsg>(ThreadMsgType::EXIT, nulldata);
+        PostMsg(pMsg);
+        // Wait for the thread to terminate, it is deleted automatically
+        m_Thread->join();
+    }
+    catch (const std::exception&) { } // Swallow any exception.
 }
 
 void RequestThread::PostMsg(ArmnnPreparedModel* model,
diff --git a/service.cpp b/service.cpp
index 742091ef..4ab59c85 100644
--- a/service.cpp
+++ b/service.cpp
@@ -18,15 +18,35 @@ using namespace std;
 
 int main(int argc, char** argv)
 {
-    android::sp<ArmnnDriver> driver = new ArmnnDriver(DriverOptions(argc, argv));
+    android::sp<ArmnnDriver> driver;
+    try
+    {
+        driver = new ArmnnDriver(DriverOptions(argc, argv));
+    }
+    catch (const std::exception& e)
+    {
+        ALOGE("Could not create driver: %s", e.what());
+        return EXIT_FAILURE;
+    }
 
     android::hardware::configureRpcThreadpool(1, true);
-    if (driver->registerAsService("armnn") != android::OK)
+    android::status_t status = android::UNKNOWN_ERROR;
+    try
+    {
+        status = driver->registerAsService("armnn");
+    }
+    catch (const std::exception& e)
+    {
+        ALOGE("Could not register service: %s", e.what());
+        return EXIT_FAILURE;
+    }
+    if (status != android::OK)
     {
         ALOGE("Could not register service");
-        return 1;
+        return EXIT_FAILURE;
     }
+
     android::hardware::joinRpcThreadpool();
     ALOGE("Service exited!");
-    return 1;
+    return EXIT_FAILURE;
 }
diff --git a/test/Android.mk b/test/Android.mk
index 95de4617..d74afecc 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -28,7 +28,15 @@ LOCAL_CFLAGS := \
 
 LOCAL_SRC_FILES :=	\
 	Tests.cpp \
-	UtilsTests.cpp
+	UtilsTests.cpp \
+	Concurrent.cpp  \
+	Convolution2D.cpp  \
+	FullyConnected.cpp  \
+	GenericLayerTests.cpp \
+	DriverTestHelpers.cpp \
+	SystemProperties.cpp \
+	Merger.cpp \
+	TestTensor.cpp
 
 LOCAL_STATIC_LIBRARIES := \
 	libarmnn-driver \
@@ -45,9 +53,8 @@ LOCAL_SHARED_LIBRARIES :=  \
 	libhidlbase \
 	libhidltransport \
 	libhidlmemory \
-	libtextclassifier \
-	libtextclassifier_hash \
 	liblog \
+	libtextclassifier_hash \
 	libutils \
 	android.hardware.neuralnetworks@1.0 \
 	android.hidl.allocator@1.0 \
@@ -63,6 +70,8 @@ LOCAL_ARM_MODE := arm
 # Mark source files as dependent on Android.mk
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 
+LOCAL_PROPRIETARY_MODULE := true
+
 include $(BUILD_EXECUTABLE)
 
 
diff --git a/test/Concurrent.cpp b/test/Concurrent.cpp
new file mode 100644
index 00000000..16734dc3
--- /dev/null
+++ b/test/Concurrent.cpp
@@ -0,0 +1,109 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "DriverTestHelpers.hpp"
+#include <boost/test/unit_test.hpp>
+#include <log/log.h>
+
+BOOST_AUTO_TEST_SUITE(ConcurrentDriverTests)
+
+using ArmnnDriver = armnn_driver::ArmnnDriver;
+using DriverOptions = armnn_driver::DriverOptions;
+using namespace android::nn;
+using namespace driverTestHelpers;
+
+// Add our own test for concurrent execution
+// The main point of this test is to check that multiple requests can be
+// executed without waiting for the callback from previous execution.
+// The operations performed are not significant.
+BOOST_AUTO_TEST_CASE(ConcurrentExecute)
+{
+    ALOGI("ConcurrentExecute: entry");
+
+    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+    Model model = {};
+
+    // add operands
+    int32_t actValue      = 0;
+    float   weightValue[] = {2, 4, 1};
+    float   biasValue[]   = {4};
+
+    AddInputOperand(model, hidl_vec<uint32_t>{1, 3});
+    AddTensorOperand(model, hidl_vec<uint32_t>{1, 3}, weightValue);
+    AddTensorOperand(model, hidl_vec<uint32_t>{1}, biasValue);
+    AddIntOperand(model, actValue);
+    AddOutputOperand(model, hidl_vec<uint32_t>{1, 1});
+
+    // make the fully connected operation
+    model.operations.resize(1);
+    model.operations[0].type = OperationType::FULLY_CONNECTED;
+    model.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2, 3};
+    model.operations[0].outputs = hidl_vec<uint32_t>{4};
+
+    // make the prepared models
+    const size_t maxRequests = 5;
+    android::sp<IPreparedModel> preparedModels[maxRequests];
+    for (size_t i = 0; i < maxRequests; ++i)
+    {
+        preparedModels[i] = PrepareModel(model, *driver);
+    }
+
+    // construct the request data
+    DataLocation inloc = {};
+    inloc.poolIndex = 0;
+    inloc.offset    = 0;
+    inloc.length    = 3 * sizeof(float);
+    RequestArgument input = {};
+    input.location = inloc;
+    input.dimensions = hidl_vec<uint32_t>{};
+
+    DataLocation outloc = {};
+    outloc.poolIndex = 1;
+    outloc.offset    = 0;
+    outloc.length    = 1 * sizeof(float);
+    RequestArgument output = {};
+    output.location  = outloc;
+    output.dimensions = hidl_vec<uint32_t>{};
+
+    // build the requests
+    Request requests[maxRequests];
+    android::sp<IMemory> outMemory[maxRequests];
+    float* outdata[maxRequests];
+    for (size_t i = 0; i < maxRequests; ++i)
+    {
+        requests[i].inputs  = hidl_vec<RequestArgument>{input};
+        requests[i].outputs = hidl_vec<RequestArgument>{output};
+        // set the input data (matching source test)
+        float indata[] = {2, 32, 16};
+        AddPoolAndSetData(3, requests[i], indata);
+        // add memory for the output
+        outMemory[i] = AddPoolAndGetData(1, requests[i]);
+        outdata[i] = static_cast<float*>(static_cast<void*>(outMemory[i]->getPointer()));
+    }
+
+    // invoke the execution of the requests
+    ALOGI("ConcurrentExecute: executing requests");
+    android::sp<ExecutionCallback> cb[maxRequests];
+    for (size_t i = 0; i < maxRequests; ++i)
+    {
+        cb[i] = ExecuteNoWait(preparedModels[i], requests[i]);
+    }
+
+    // wait for the requests to complete
+    ALOGI("ConcurrentExecute: waiting for callbacks");
+    for (size_t i = 0; i < maxRequests; ++i)
+    {
+        cb[i]->wait();
+    }
+
+    // check the results
+    ALOGI("ConcurrentExecute: validating results");
+    for (size_t i = 0; i < maxRequests; ++i)
+    {
+        BOOST_TEST(outdata[i][0] == 152);
+    }
+    ALOGI("ConcurrentExecute: exit");
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/Convolution2D.cpp b/test/Convolution2D.cpp
new file mode 100644
index 00000000..90edb415
--- /dev/null
+++ b/test/Convolution2D.cpp
@@ -0,0 +1,110 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "DriverTestHelpers.hpp"
+#include <boost/test/unit_test.hpp>
+#include <log/log.h>
+
+#include "OperationsUtils.h"
+
+BOOST_AUTO_TEST_SUITE(Convolution2DTests)
+
+using ArmnnDriver = armnn_driver::ArmnnDriver;
+using DriverOptions = armnn_driver::DriverOptions;
+using namespace driverTestHelpers;
+
+namespace
+{
+
+void PaddingTestImpl(android::nn::PaddingScheme paddingScheme)
+{
+    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+    Model model  = {};
+
+    uint32_t outSize = paddingScheme == android::nn::kPaddingSame ? 2 : 1;
+
+    // add operands
+    float weightValue[] = {1, -1, 0, 1};
+    float biasValue[]   = {0};
+
+    AddInputOperand(model, hidl_vec<uint32_t>{1, 2, 3, 1});
+    AddTensorOperand(model, hidl_vec<uint32_t>{1, 2, 2, 1}, weightValue);
+    AddTensorOperand(model, hidl_vec<uint32_t>{1}, biasValue);
+    AddIntOperand(model, (int32_t)paddingScheme); // padding
+    AddIntOperand(model, 2); // stride x
+    AddIntOperand(model, 2); // stride y
+    AddIntOperand(model, 0); // no activation
+    AddOutputOperand(model, hidl_vec<uint32_t>{1, 1, outSize, 1});
+
+    // make the convolution operation
+    model.operations.resize(1);
+    model.operations[0].type = OperationType::CONV_2D;
+    model.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2, 3, 4, 5, 6};
+    model.operations[0].outputs = hidl_vec<uint32_t>{7};
+
+    // make the prepared model
+    android::sp<IPreparedModel> preparedModel = PrepareModel(model, *driver);
+
+    // construct the request
+    DataLocation inloc    = {};
+    inloc.poolIndex       = 0;
+    inloc.offset          = 0;
+    inloc.length          = 6 * sizeof(float);
+    RequestArgument input = {};
+    input.location        = inloc;
+    input.dimensions      = hidl_vec<uint32_t>{};
+
+    DataLocation outloc    = {};
+    outloc.poolIndex       = 1;
+    outloc.offset          = 0;
+    outloc.length          = outSize * sizeof(float);
+    RequestArgument output = {};
+    output.location        = outloc;
+    output.dimensions      = hidl_vec<uint32_t>{};
+
+    Request request = {};
+    request.inputs  = hidl_vec<RequestArgument>{input};
+    request.outputs = hidl_vec<RequestArgument>{output};
+
+
+    // set the input data (matching source test)
+    float indata[] = {4, 1, 0, 3, -1, 2};
+    AddPoolAndSetData(6, request, indata);
+
+    // add memory for the output
+    android::sp<IMemory> outMemory = AddPoolAndGetData(outSize, request);
+    float*               outdata   = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
+
+    // run the execution
+    Execute(preparedModel, request);
+
+    // check the result
+    if (paddingScheme == android::nn::kPaddingValid)
+    {
+        BOOST_TEST(outdata[0] == 2);
+    }
+    else if (paddingScheme == android::nn::kPaddingSame)
+    {
+        BOOST_TEST(outdata[0] == 2);
+        BOOST_TEST(outdata[1] == 0);
+    }
+    else
+    {
+        BOOST_TEST(false);
+    }
+}
+
+} // namespace <anonymous>
+
+BOOST_AUTO_TEST_CASE(ConvValidPadding)
+{
+    PaddingTestImpl(android::nn::kPaddingValid);
+}
+
+BOOST_AUTO_TEST_CASE(ConvSamePadding)
+{
+    PaddingTestImpl(android::nn::kPaddingSame);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/DriverTestHelpers.cpp b/test/DriverTestHelpers.cpp
new file mode 100644
index 00000000..5b371921
--- /dev/null
+++ b/test/DriverTestHelpers.cpp
@@ -0,0 +1,218 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "DriverTestHelpers.hpp"
+#include <log/log.h>
+#include <boost/test/unit_test.hpp>
+
+namespace android
+{
+namespace hardware
+{
+namespace neuralnetworks
+{
+namespace V1_0
+{
+
+std::ostream& operator<<(std::ostream& os, ErrorStatus stat)
+{
+   return os << static_cast<int>(stat);
+}
+
+} // namespace android::hardware::neuralnetworks::V1_0
+} // namespace android::hardware::neuralnetworks
+} // namespace android::hardware
+} // namespace android
+
+
+namespace driverTestHelpers
+{
+
+Return<void> ExecutionCallback::notify(ErrorStatus status)
+{
+    (void)status;
+    ALOGI("ExecutionCallback::notify invoked");
+    std::lock_guard<std::mutex> executionLock(mMutex);
+    mNotified = true;
+    mCondition.notify_one();
+    return Void();
+}
+
+Return<void> ExecutionCallback::wait()
+{
+    ALOGI("ExecutionCallback::wait invoked");
+    std::unique_lock<std::mutex> executionLock(mMutex);
+    while (!mNotified)
+    {
+        mCondition.wait(executionLock);
+    }
+    mNotified = false;
+    return Void();
+}
+
+Return<void> PreparedModelCallback::notify(ErrorStatus status,
+                                           const android::sp<IPreparedModel>& preparedModel)
+{
+    m_ErrorStatus = status;
+    m_PreparedModel = preparedModel;
+    return Void();
+}
+
+// lifted from common/Utils.cpp
+hidl_memory allocateSharedMemory(int64_t size)
+{
+    hidl_memory memory;
+
+    const std::string& type      = "ashmem";
+    android::sp<IAllocator>     allocator = IAllocator::getService(type);
+    allocator->allocate(size, [&](bool success, const hidl_memory& mem) {
+        if (!success)
+        {
+            ALOGE("unable to allocate %li bytes of %s", size, type.c_str());
+        }
+        else
+        {
+            memory = mem;
+        }
+    });
+
+    return memory;
+}
+
+android::sp<IMemory> AddPoolAndGetData(uint32_t size, Request& request)
+{
+    hidl_memory pool;
+
+    android::sp<IAllocator> allocator = IAllocator::getService("ashmem");
+    allocator->allocate(sizeof(float) * size, [&](bool success, const hidl_memory& mem) {
+        BOOST_TEST(success);
+        pool = mem;
+    });
+
+    request.pools.resize(request.pools.size() + 1);
+    request.pools[request.pools.size() - 1] = pool;
+
+    android::sp<IMemory> mapped = mapMemory(pool);
+    mapped->update();
+    return mapped;
+}
+
+void AddPoolAndSetData(uint32_t size, Request& request, const float* data)
+{
+    android::sp<IMemory> memory = AddPoolAndGetData(size, request);
+
+    float* dst = static_cast<float*>(static_cast<void*>(memory->getPointer()));
+
+    memcpy(dst, data, size * sizeof(float));
+}
+
+void AddOperand(Model& model, const Operand& op)
+{
+    model.operands.resize(model.operands.size() + 1);
+    model.operands[model.operands.size() - 1] = op;
+}
+
+void AddIntOperand(Model& model, int32_t value)
+{
+    DataLocation location = {};
+    location.offset = model.operandValues.size();
+    location.length = sizeof(int32_t);
+
+    Operand op    = {};
+    op.type = OperandType::INT32;
+    op.dimensions = hidl_vec<uint32_t>{};
+    op.lifetime   = OperandLifeTime::CONSTANT_COPY;
+    op.location   = location;
+
+    model.operandValues.resize(model.operandValues.size() + location.length);
+    *reinterpret_cast<int32_t*>(&model.operandValues[location.offset]) = value;
+
+    AddOperand(model, op);
+}
+
+void AddInputOperand(Model& model, hidl_vec<uint32_t> dimensions)
+{
+    Operand op    = {};
+    op.type       = OperandType::TENSOR_FLOAT32;
+    op.dimensions = dimensions;
+    op.lifetime   = OperandLifeTime::MODEL_INPUT;
+
+    AddOperand(model, op);
+
+    model.inputIndexes.resize(model.inputIndexes.size() + 1);
+    model.inputIndexes[model.inputIndexes.size() - 1] = model.operands.size() - 1;
+}
+
+void AddOutputOperand(Model& model, hidl_vec<uint32_t> dimensions)
+{
+    Operand op = {};
+    op.type       = OperandType::TENSOR_FLOAT32;
+    op.dimensions = dimensions;
+    op.lifetime   = OperandLifeTime::MODEL_OUTPUT;
+
+    AddOperand(model, op);
+
+    model.outputIndexes.resize(model.outputIndexes.size() + 1);
+    model.outputIndexes[model.outputIndexes.size() - 1] = model.operands.size() - 1;
+}
+
+
+android::sp<IPreparedModel> PrepareModelWithStatus(const Model& model,
+                                                   armnn_driver::ArmnnDriver& driver,
+                                                   ErrorStatus & prepareStatus,
+                                                   ErrorStatus expectedStatus)
+{
+
+    android::sp<PreparedModelCallback> cb(new PreparedModelCallback());
+    driver.prepareModel(model, cb);
+
+    prepareStatus = cb->GetErrorStatus();
+    BOOST_TEST(prepareStatus == expectedStatus);
+    if (expectedStatus == ErrorStatus::NONE)
+    {
+        BOOST_TEST((cb->GetPreparedModel() != nullptr));
+    }
+    return cb->GetPreparedModel();
+}
+
+android::sp<IPreparedModel> PrepareModel(const Model& model,
+                                         armnn_driver::ArmnnDriver& driver)
+{
+    ErrorStatus prepareStatus = ErrorStatus::NONE;
+    return PrepareModelWithStatus(model, driver, prepareStatus);
+}
+
+ErrorStatus Execute(android::sp<IPreparedModel> preparedModel,
+                    const Request& request,
+                    ErrorStatus expectedStatus)
+{
+    android::sp<ExecutionCallback> cb(new ExecutionCallback());
+    ErrorStatus execStatus = preparedModel->execute(request, cb);
+    BOOST_TEST(execStatus == expectedStatus);
+    ALOGI("Execute: waiting for callback to be invoked");
+    cb->wait();
+    return execStatus;
+}
+
+android::sp<ExecutionCallback> ExecuteNoWait(android::sp<IPreparedModel> preparedModel, const Request& request)
+{
+    android::sp<ExecutionCallback> cb(new ExecutionCallback());
+    BOOST_TEST(preparedModel->execute(request, cb) == ErrorStatus::NONE);
+    ALOGI("ExecuteNoWait: returning callback object");
+    return cb;
+}
+
+template<>
+OperandType TypeToOperandType<float>()
+{
+    return OperandType::TENSOR_FLOAT32;
+};
+
+template<>
+OperandType TypeToOperandType<int32_t>()
+{
+    return OperandType::TENSOR_INT32;
+};
+
+} // namespace driverTestHelpers
diff --git a/test/DriverTestHelpers.hpp b/test/DriverTestHelpers.hpp
new file mode 100644
index 00000000..e90f7ecf
--- /dev/null
+++ b/test/DriverTestHelpers.hpp
@@ -0,0 +1,135 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#ifndef LOG_TAG
+#define LOG_TAG "ArmnnDriverTests"
+#endif // LOG_TAG
+
+#include "../ArmnnDriver.hpp"
+#include <iosfwd>
+
+namespace android
+{
+namespace hardware
+{
+namespace neuralnetworks
+{
+namespace V1_0
+{
+
+std::ostream& operator<<(std::ostream& os, ErrorStatus stat);
+
+} // namespace android::hardware::neuralnetworks::V1_0
+} // namespace android::hardware::neuralnetworks
+} // namespace android::hardware
+} // namespace android
+
+namespace driverTestHelpers
+{
+
+std::ostream& operator<<(std::ostream& os, android::hardware::neuralnetworks::V1_0::ErrorStatus stat);
+
+struct ExecutionCallback : public IExecutionCallback
+{
+    ExecutionCallback() : mNotified(false) {}
+    Return<void> notify(ErrorStatus status) override;
+    /// wait until the callback has notified us that it is done
+    Return<void> wait();
+
+private:
+    // use a mutex and a condition variable to wait for asynchronous callbacks
+    std::mutex mMutex;
+    std::condition_variable mCondition;
+    // and a flag, in case we are notified before the wait call
+    bool mNotified;
+};
+
+class PreparedModelCallback : public IPreparedModelCallback
+{
+public:
+    PreparedModelCallback()
+        : m_ErrorStatus(ErrorStatus::NONE)
+        , m_PreparedModel()
+    { }
+    ~PreparedModelCallback() override { }
+
+    Return<void> notify(ErrorStatus status,
+                        const android::sp<IPreparedModel>& preparedModel) override;
+    ErrorStatus GetErrorStatus() { return m_ErrorStatus; }
+    android::sp<IPreparedModel> GetPreparedModel() { return m_PreparedModel; }
+
+private:
+    ErrorStatus                  m_ErrorStatus;
+    android::sp<IPreparedModel>  m_PreparedModel;
+};
+
+hidl_memory allocateSharedMemory(int64_t size);
+
+android::sp<IMemory> AddPoolAndGetData(uint32_t size, Request& request);
+
+void AddPoolAndSetData(uint32_t size, Request& request, const float* data);
+
+void AddOperand(Model& model, const Operand& op);
+
+void AddIntOperand(Model& model, int32_t value);
+
+template<typename T>
+OperandType TypeToOperandType();
+
+template<>
+OperandType TypeToOperandType<float>();
+
+template<>
+OperandType TypeToOperandType<int32_t>();
+
+template<typename T>
+void AddTensorOperand(Model& model, hidl_vec<uint32_t> dimensions, T* values)
+{
+    uint32_t totalElements = 1;
+    for (uint32_t dim : dimensions)
+    {
+        totalElements *= dim;
+    }
+
+    DataLocation location = {};
+    location.offset = model.operandValues.size();
+    location.length = totalElements * sizeof(T);
+
+    Operand op    = {};
+    op.type       = TypeToOperandType<T>();
+    op.dimensions = dimensions;
+    op.lifetime   = OperandLifeTime::CONSTANT_COPY;
+    op.location   = location;
+
+    model.operandValues.resize(model.operandValues.size() + location.length);
+    for (uint32_t i = 0; i < totalElements; i++)
+    {
+        *(reinterpret_cast<T*>(&model.operandValues[location.offset]) + i) = values[i];
+    }
+
+    AddOperand(model, op);
+}
+
+void AddInputOperand(Model& model, hidl_vec<uint32_t> dimensions);
+
+void AddOutputOperand(Model& model, hidl_vec<uint32_t> dimensions);
+
+android::sp<IPreparedModel> PrepareModel(const Model& model,
+                                         armnn_driver::ArmnnDriver& driver);
+
+android::sp<IPreparedModel> PrepareModelWithStatus(const Model& model,
+                                                   armnn_driver::ArmnnDriver& driver,
+                                                   ErrorStatus & prepareStatus,
+                                                   ErrorStatus expectedStatus=ErrorStatus::NONE);
+
+ErrorStatus Execute(android::sp<IPreparedModel> preparedModel,
+                    const Request& request,
+                    ErrorStatus expectedStatus=ErrorStatus::NONE);
+
+android::sp<ExecutionCallback> ExecuteNoWait(android::sp<IPreparedModel> preparedModel,
+                                             const Request& request);
+
+} // namespace driverTestHelpers
diff --git a/test/FullyConnected.cpp b/test/FullyConnected.cpp
new file mode 100644
index 00000000..ea6c8715
--- /dev/null
+++ b/test/FullyConnected.cpp
@@ -0,0 +1,254 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "DriverTestHelpers.hpp"
+#include <boost/test/unit_test.hpp>
+#include <log/log.h>
+
+BOOST_AUTO_TEST_SUITE(FullyConnectedTests)
+
+using ArmnnDriver = armnn_driver::ArmnnDriver;
+using DriverOptions = armnn_driver::DriverOptions;
+using namespace driverTestHelpers;
+
+// Add our own test here since we fail the fc tests which Google supplies (because of non-const weights)
+BOOST_AUTO_TEST_CASE(FullyConnected)
+{
+    // this should ideally replicate fully_connected_float.model.cpp
+    // but that uses slightly weird dimensions which I don't think we need to support for now
+
+    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+    Model model = {};
+
+    // add operands
+    int32_t actValue      = 0;
+    float   weightValue[] = {2, 4, 1};
+    float   biasValue[]   = {4};
+
+    AddInputOperand(model, hidl_vec<uint32_t>{1, 3});
+    AddTensorOperand(model, hidl_vec<uint32_t>{1, 3}, weightValue);
+    AddTensorOperand(model, hidl_vec<uint32_t>{1}, biasValue);
+    AddIntOperand(model, actValue);
+    AddOutputOperand(model, hidl_vec<uint32_t>{1, 1});
+
+    // make the fully connected operation
+    model.operations.resize(1);
+    model.operations[0].type = OperationType::FULLY_CONNECTED;
+    model.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2, 3};
+    model.operations[0].outputs = hidl_vec<uint32_t>{4};
+
+    // make the prepared model
+    android::sp<IPreparedModel> preparedModel = PrepareModel(model, *driver);
+
+    // construct the request
+    DataLocation inloc = {};
+    inloc.poolIndex = 0;
+    inloc.offset    = 0;
+    inloc.length    = 3 * sizeof(float);
+    RequestArgument input = {};
+    input.location = inloc;
+    input.dimensions = hidl_vec<uint32_t>{};
+
+    DataLocation outloc = {};
+    outloc.poolIndex = 1;
+    outloc.offset    = 0;
+    outloc.length    = 1 * sizeof(float);
+    RequestArgument output = {};
+    output.location  = outloc;
+    output.dimensions = hidl_vec<uint32_t>{};
+
+    Request request = {};
+    request.inputs  = hidl_vec<RequestArgument>{input};
+    request.outputs = hidl_vec<RequestArgument>{output};
+
+    // set the input data (matching source test)
+    float indata[] = {2, 32, 16};
+    AddPoolAndSetData(3, request, indata);
+
+    // add memory for the output
+    android::sp<IMemory> outMemory = AddPoolAndGetData(1, request);
+    float* outdata = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
+
+    // run the execution
+    Execute(preparedModel, request);
+
+    // check the result
+    BOOST_TEST(outdata[0] == 152);
+}
+
+BOOST_AUTO_TEST_CASE(TestFullyConnected4dInput)
+{
+    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+
+    ErrorStatus error;
+    std::vector<bool> sup;
+
+    ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
+        {
+            error = status;
+            sup = supported;
+        };
+
+    Model model = {};
+
+    // operands
+    int32_t actValue      = 0;
+    float   weightValue[] = {1, 0, 0, 0, 0, 0, 0, 0,
+                             0, 1, 0, 0, 0, 0, 0, 0,
+                             0, 0, 1, 0, 0, 0, 0, 0,
+                             0, 0, 0, 1, 0, 0, 0, 0,
+                             0, 0, 0, 0, 1, 0, 0, 0,
+                             0, 0, 0, 0, 0, 1, 0, 0,
+                             0, 0, 0, 0, 0, 0, 1, 0,
+                             0, 0, 0, 0, 0, 0, 0, 1}; //identity
+    float   biasValue[]   = {0, 0, 0, 0, 0, 0, 0, 0};
+
+    // fully connected operation
+    AddInputOperand(model, hidl_vec<uint32_t>{1, 1, 1, 8});
+    AddTensorOperand(model, hidl_vec<uint32_t>{8, 8}, weightValue);
+    AddTensorOperand(model, hidl_vec<uint32_t>{8}, biasValue);
+    AddIntOperand(model, actValue);
+    AddOutputOperand(model, hidl_vec<uint32_t>{1, 8});
+
+    model.operations.resize(1);
+
+    model.operations[0].type = OperationType::FULLY_CONNECTED;
+    model.operations[0].inputs  = hidl_vec<uint32_t>{0,1,2,3};
+    model.operations[0].outputs = hidl_vec<uint32_t>{4};
+
+    // make the prepared model
+    android::sp<IPreparedModel> preparedModel = PrepareModel(model, *driver);
+
+
+    // construct the request
+    DataLocation inloc = {};
+    inloc.poolIndex = 0;
+    inloc.offset    = 0;
+    inloc.length    = 8 * sizeof(float);
+    RequestArgument input = {};
+    input.location = inloc;
+    input.dimensions = hidl_vec<uint32_t>{};
+
+    DataLocation outloc = {};
+    outloc.poolIndex = 1;
+    outloc.offset    = 0;
+    outloc.length    = 8 * sizeof(float);
+    RequestArgument output = {};
+    output.location  = outloc;
+    output.dimensions = hidl_vec<uint32_t>{};
+
+    Request request = {};
+    request.inputs  = hidl_vec<RequestArgument>{input};
+    request.outputs = hidl_vec<RequestArgument>{output};
+
+    // set the input data
+    float indata[] = {1,2,3,4,5,6,7,8};
+    AddPoolAndSetData(8, request, indata);
+
+    // add memory for the output
+    android::sp<IMemory> outMemory = AddPoolAndGetData(8, request);
+    float* outdata = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
+
+    // run the execution
+    Execute(preparedModel, request);
+
+    // check the result
+    BOOST_TEST(outdata[0] == 1);
+    BOOST_TEST(outdata[1] == 2);
+    BOOST_TEST(outdata[2] == 3);
+    BOOST_TEST(outdata[3] == 4);
+    BOOST_TEST(outdata[4] == 5);
+    BOOST_TEST(outdata[5] == 6);
+    BOOST_TEST(outdata[6] == 7);
+    BOOST_TEST(outdata[7] == 8);
+}
+
+BOOST_AUTO_TEST_CASE(TestFullyConnected4dInputReshape)
+{
+    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+
+    ErrorStatus error;
+    std::vector<bool> sup;
+
+    ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
+        {
+            error = status;
+            sup = supported;
+        };
+
+    Model model = {};
+
+    // operands
+    int32_t actValue      = 0;
+    float   weightValue[] = {1, 0, 0, 0, 0, 0, 0, 0,
+                             0, 1, 0, 0, 0, 0, 0, 0,
+                             0, 0, 1, 0, 0, 0, 0, 0,
+                             0, 0, 0, 1, 0, 0, 0, 0,
+                             0, 0, 0, 0, 1, 0, 0, 0,
+                             0, 0, 0, 0, 0, 1, 0, 0,
+                             0, 0, 0, 0, 0, 0, 1, 0,
+                             0, 0, 0, 0, 0, 0, 0, 1}; //identity
+    float   biasValue[]   = {0, 0, 0, 0, 0, 0, 0, 0};
+
+    // fully connected operation
+    AddInputOperand(model, hidl_vec<uint32_t>{1, 2, 2, 2});
+    AddTensorOperand(model, hidl_vec<uint32_t>{8, 8}, weightValue);
+    AddTensorOperand(model, hidl_vec<uint32_t>{8}, biasValue);
+    AddIntOperand(model, actValue);
+    AddOutputOperand(model, hidl_vec<uint32_t>{1, 8});
+
+    model.operations.resize(1);
+
+    model.operations[0].type = OperationType::FULLY_CONNECTED;
+    model.operations[0].inputs  = hidl_vec<uint32_t>{0,1,2,3};
+    model.operations[0].outputs = hidl_vec<uint32_t>{4};
+
+    // make the prepared model
+    android::sp<IPreparedModel> preparedModel = PrepareModel(model, *driver);
+
+
+    // construct the request
+    DataLocation inloc = {};
+    inloc.poolIndex = 0;
+    inloc.offset    = 0;
+    inloc.length    = 8 * sizeof(float);
+    RequestArgument input = {};
+    input.location = inloc;
+    input.dimensions = hidl_vec<uint32_t>{};
+
+    DataLocation outloc = {};
+    outloc.poolIndex = 1;
+    outloc.offset    = 0;
+    outloc.length    = 8 * sizeof(float);
+    RequestArgument output = {};
+    output.location  = outloc;
+    output.dimensions = hidl_vec<uint32_t>{};
+
+    Request request = {};
+    request.inputs  = hidl_vec<RequestArgument>{input};
+    request.outputs = hidl_vec<RequestArgument>{output};
+
+    // set the input data
+    float indata[] = {1,2,3,4,5,6,7,8};
+    AddPoolAndSetData(8, request, indata);
+
+    // add memory for the output
+    android::sp<IMemory> outMemory = AddPoolAndGetData(8, request);
+    float* outdata = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
+
+    // run the execution
+    Execute(preparedModel, request);
+
+    // check the result
+    BOOST_TEST(outdata[0] == 1);
+    BOOST_TEST(outdata[1] == 2);
+    BOOST_TEST(outdata[2] == 3);
+    BOOST_TEST(outdata[3] == 4);
+    BOOST_TEST(outdata[4] == 5);
+    BOOST_TEST(outdata[5] == 6);
+    BOOST_TEST(outdata[6] == 7);
+    BOOST_TEST(outdata[7] == 8);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/GenericLayerTests.cpp b/test/GenericLayerTests.cpp
new file mode 100644
index 00000000..5c6c041d
--- /dev/null
+++ b/test/GenericLayerTests.cpp
@@ -0,0 +1,196 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "DriverTestHelpers.hpp"
+#include <boost/test/unit_test.hpp>
+#include <log/log.h>
+
+BOOST_AUTO_TEST_SUITE(GenericLayerTests)
+
+using ArmnnDriver = armnn_driver::ArmnnDriver;
+using DriverOptions = armnn_driver::DriverOptions;
+using namespace driverTestHelpers;
+
+BOOST_AUTO_TEST_CASE(GetSupportedOperations)
+{
+    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+
+    ErrorStatus error;
+    std::vector<bool> sup;
+
+    ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
+    {
+        error = status;
+        sup = supported;
+    };
+
+    Model model1 = {};
+
+    // add operands
+    int32_t actValue      = 0;
+    float   weightValue[] = {2, 4, 1};
+    float   biasValue[]   = {4};
+
+    AddInputOperand(model1, hidl_vec<uint32_t>{1, 3});
+    AddTensorOperand(model1, hidl_vec<uint32_t>{1, 3}, weightValue);
+    AddTensorOperand(model1, hidl_vec<uint32_t>{1}, biasValue);
+    AddIntOperand(model1, actValue);
+    AddOutputOperand(model1, hidl_vec<uint32_t>{1, 1});
+
+    // make a correct fully connected operation
+    model1.operations.resize(2);
+    model1.operations[0].type = OperationType::FULLY_CONNECTED;
+    model1.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2, 3};
+    model1.operations[0].outputs = hidl_vec<uint32_t>{4};
+
+    // make an incorrect fully connected operation
+    AddIntOperand(model1, actValue);
+    AddOutputOperand(model1, hidl_vec<uint32_t>{1, 1});
+    model1.operations[1].type = OperationType::FULLY_CONNECTED;
+    model1.operations[1].inputs = hidl_vec<uint32_t>{4};
+    model1.operations[1].outputs = hidl_vec<uint32_t>{5};
+
+    driver->getSupportedOperations(model1, cb);
+    BOOST_TEST((int)error == (int)ErrorStatus::NONE);
+    BOOST_TEST(sup[0] == true);
+    BOOST_TEST(sup[1] == false);
+
+    // Broadcast add/mul are not supported
+    Model model2 = {};
+
+    AddInputOperand(model2, hidl_vec<uint32_t>{1, 1, 3, 4});
+    AddInputOperand(model2, hidl_vec<uint32_t>{4});
+    AddOutputOperand(model2, hidl_vec<uint32_t>{1, 1, 3, 4});
+    AddOutputOperand(model2, hidl_vec<uint32_t>{1, 1, 3, 4});
+
+    model2.operations.resize(2);
+
+    model2.operations[0].type = OperationType::ADD;
+    model2.operations[0].inputs = hidl_vec<uint32_t>{0,1};
+    model2.operations[0].outputs = hidl_vec<uint32_t>{2};
+
+    model2.operations[1].type = OperationType::MUL;
+    model2.operations[1].inputs = hidl_vec<uint32_t>{0,1};
+    model2.operations[1].outputs = hidl_vec<uint32_t>{3};
+
+    driver->getSupportedOperations(model2, cb);
+    BOOST_TEST((int)error == (int)ErrorStatus::NONE);
+    BOOST_TEST(sup[0] == false);
+    BOOST_TEST(sup[1] == false);
+
+    Model model3 = {};
+
+    // Add unsupported operation, should return no error but we don't support it
+    AddInputOperand(model3, hidl_vec<uint32_t>{1, 1, 1, 8});
+    AddIntOperand(model3, 2);
+    AddOutputOperand(model3, hidl_vec<uint32_t>{1, 2, 2, 2});
+    model3.operations.resize(1);
+    model3.operations[0].type = OperationType::DEPTH_TO_SPACE;
+    model1.operations[0].inputs = hidl_vec<uint32_t>{0, 1};
+    model3.operations[0].outputs = hidl_vec<uint32_t>{2};
+
+    driver->getSupportedOperations(model3, cb);
+    BOOST_TEST((int)error == (int)ErrorStatus::NONE);
+    BOOST_TEST(sup[0] == false);
+
+    // Add invalid operation
+    Model model4 = {};
+    AddIntOperand(model4, 0);
+    model4.operations.resize(1);
+    model4.operations[0].type = static_cast<OperationType>(100);
+    model4.operations[0].outputs = hidl_vec<uint32_t>{0};
+
+    driver->getSupportedOperations(model4, cb);
+    BOOST_TEST((int)error == (int)ErrorStatus::INVALID_ARGUMENT);
+}
+
+// The purpose of this test is to ensure that when encountering an unsupported operation
+//      it is skipped and getSupportedOperations() continues (rather than failing and stopping).
+//      As per IVGCVSW-710.
+BOOST_AUTO_TEST_CASE(UnsupportedLayerContinueOnFailure)
+{
+    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+
+    ErrorStatus error;
+    std::vector<bool> sup;
+
+    ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
+    {
+        error = status;
+        sup = supported;
+    };
+
+    Model model = {};
+
+    // operands
+    int32_t actValue      = 0;
+    float   weightValue[] = {2, 4, 1};
+    float   biasValue[]   = {4};
+
+    // broadcast add is unsupported at the time of writing this test, but any unsupported layer will do
+    AddInputOperand(model, hidl_vec<uint32_t>{1, 1, 3, 4});
+    AddInputOperand(model, hidl_vec<uint32_t>{4});
+    AddOutputOperand(model, hidl_vec<uint32_t>{1, 1, 3, 4});
+
+    // fully connected
+    AddInputOperand(model, hidl_vec<uint32_t>{1, 3});
+    AddTensorOperand(model, hidl_vec<uint32_t>{1, 3}, weightValue);
+    AddTensorOperand(model, hidl_vec<uint32_t>{1}, biasValue);
+    AddIntOperand(model, actValue);
+    AddOutputOperand(model, hidl_vec<uint32_t>{1, 1});
+
+    // broadcast mul is unsupported
+    AddOutputOperand(model, hidl_vec<uint32_t>{1, 1, 3, 4});
+
+    model.operations.resize(3);
+
+    // unsupported
+    model.operations[0].type = OperationType::ADD;
+    model.operations[0].inputs = hidl_vec<uint32_t>{0,1};
+    model.operations[0].outputs = hidl_vec<uint32_t>{2};
+
+    // supported
+    model.operations[1].type = OperationType::FULLY_CONNECTED;
+    model.operations[1].inputs  = hidl_vec<uint32_t>{3, 4, 5, 6};
+    model.operations[1].outputs = hidl_vec<uint32_t>{7};
+
+    // unsupported
+    model.operations[2].type = OperationType::MUL;
+    model.operations[2].inputs = hidl_vec<uint32_t>{0,1};
+    model.operations[2].outputs = hidl_vec<uint32_t>{8};
+
+    // we are testing that the unsupported layers return false and the test continues
+    //      rather than failing and stopping.
+    driver->getSupportedOperations(model, cb);
+    BOOST_TEST((int)error == (int)ErrorStatus::NONE);
+    BOOST_TEST(sup[0] == false);
+    BOOST_TEST(sup[1] == true);
+    BOOST_TEST(sup[2] == false);
+}
+
+// The purpose of this test is to ensure that when encountering an failure
+//      during mem pool mapping we properly report an error to the framework via a callback
+BOOST_AUTO_TEST_CASE(ModelToINetworkConverterMemPoolFail)
+{
+    auto driver = std::make_unique<ArmnnDriver>(armnn::Compute::CpuRef);
+
+    ErrorStatus error;
+    std::vector<bool> sup;
+
+    ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
+    {
+        error = status;
+        sup = supported;
+    };
+
+    Model model = {};
+
+    model.pools = hidl_vec<hidl_memory>{hidl_memory("Unsuported hidl memory type", nullptr, 0)};
+
+    //memory pool mapping should fail, we should report an error
+    driver->getSupportedOperations(model, cb);
+    BOOST_TEST((int)error == (int)ErrorStatus::GENERAL_FAILURE);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/Merger.cpp b/test/Merger.cpp
new file mode 100644
index 00000000..6c069a86
--- /dev/null
+++ b/test/Merger.cpp
@@ -0,0 +1,408 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "DriverTestHelpers.hpp"
+#include "TestTensor.hpp"
+#include <boost/test/unit_test.hpp>
+#include <log/log.h>
+
+
+BOOST_AUTO_TEST_SUITE(MergerTests)
+
+using ArmnnDriver = armnn_driver::ArmnnDriver;
+using DriverOptions = armnn_driver::DriverOptions;
+using namespace driverTestHelpers;
+
+namespace
+{
+
+void
+MergerTestImpl(const std::vector<const TestTensor*> & inputs,
+                int32_t concatAxis,
+                const TestTensor & expectedOutputTensor,
+                ErrorStatus expectedPrepareStatus=ErrorStatus::NONE,
+                ErrorStatus expectedExecStatus=ErrorStatus::NONE)
+{
+    std::unique_ptr<ArmnnDriver> driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+    Model model{};
+
+    hidl_vec<uint32_t> modelInputIds;
+    modelInputIds.resize(inputs.size()+1);
+    for (uint32_t i = 0; i<inputs.size(); ++i)
+    {
+        modelInputIds[i] = i;
+        AddInputOperand(model, inputs[i]->GetDimensions());
+    }
+    modelInputIds[inputs.size()] = inputs.size(); // add an id for the axis too
+    AddIntOperand(model, concatAxis);
+    AddOutputOperand(model, expectedOutputTensor.GetDimensions());
+
+    // make the concat operation
+    model.operations.resize(1);
+    model.operations[0].type = OperationType::CONCATENATION;
+    model.operations[0].inputs  = modelInputIds;
+    model.operations[0].outputs = hidl_vec<uint32_t>{static_cast<uint32_t>(inputs.size()+1)};
+
+    // make the prepared model
+    ErrorStatus prepareStatus=ErrorStatus::NONE;
+    android::sp<IPreparedModel> preparedModel = PrepareModelWithStatus(model,
+                                                                       *driver,
+                                                                       prepareStatus,
+                                                                       expectedPrepareStatus);
+    BOOST_TEST(prepareStatus == expectedPrepareStatus);
+    if (prepareStatus != ErrorStatus::NONE)
+    {
+        // prepare failed, we cannot continue
+        return;
+    }
+
+    BOOST_TEST(preparedModel.get() != nullptr);
+    if (preparedModel.get() == nullptr)
+    {
+        // don't spoil other tests if prepare failed
+        return;
+    }
+
+    // construct the request
+    hidl_vec<RequestArgument> inputArguments;
+    hidl_vec<RequestArgument> outputArguments;
+    inputArguments.resize(inputs.size());
+    outputArguments.resize(1);
+
+    // the request's memory pools will follow the same order as
+    // the inputs
+    for (uint32_t i = 0; i<inputs.size(); ++i)
+    {
+        DataLocation inloc = {};
+        inloc.poolIndex = i;
+        inloc.offset = 0;
+        inloc.length = inputs[i]->GetNumElements() * sizeof(float);
+        RequestArgument input = {};
+        input.location = inloc;
+        input.dimensions = inputs[i]->GetDimensions();
+        inputArguments[i] = input;
+    }
+
+    // and an additional memory pool is needed for the output
+    {
+        DataLocation outloc = {};
+        outloc.poolIndex = inputs.size();
+        outloc.offset = 0;
+        outloc.length = expectedOutputTensor.GetNumElements() * sizeof(float);
+        RequestArgument output = {};
+        output.location = outloc;
+        output.dimensions = expectedOutputTensor.GetDimensions();
+        outputArguments[0] = output;
+    }
+
+    // make the request based on the arguments
+    Request request = {};
+    request.inputs  = inputArguments;
+    request.outputs = outputArguments;
+
+    // set the input data
+    for (uint32_t i = 0; i<inputs.size(); ++i)
+    {
+        AddPoolAndSetData(inputs[i]->GetNumElements(),
+                            request,
+                            inputs[i]->GetData());
+    }
+
+    // add memory for the output
+    android::sp<IMemory> outMemory = AddPoolAndGetData(expectedOutputTensor.GetNumElements(), request);
+    float* outdata = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
+
+    // run the execution
+    auto execStatus = Execute(preparedModel, request, expectedExecStatus);
+    BOOST_TEST(execStatus == expectedExecStatus);
+
+    if (execStatus == ErrorStatus::NONE)
+    {
+        // check the result if there was no error
+        const float * expectedOutput = expectedOutputTensor.GetData();
+        for (unsigned int i=0; i<expectedOutputTensor.GetNumElements();++i)
+        {
+            BOOST_TEST(outdata[i] == expectedOutput[i]);
+        }
+    }
+}
+
+} // namespace <anonymous>
+
+BOOST_AUTO_TEST_CASE(SimpleConcatAxis0)
+{
+    int32_t axis = 0;
+    TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
+    TestTensor bIn{armnn::TensorShape{1,1,1,1},{1}};
+    TestTensor cIn{armnn::TensorShape{1,1,1,1},{2}};
+
+    TestTensor expected{armnn::TensorShape{3,1,1,1},{0,1,2}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+}
+
+BOOST_AUTO_TEST_CASE(ConcatAxis0_NoInterleave)
+{
+    int32_t axis = 0;
+    TestTensor aIn{armnn::TensorShape{2,1,2,1},{0,  1,
+                                                2,  3}};
+    TestTensor bIn{armnn::TensorShape{3,1,2,1},{4,  5,
+                                                6,  7,
+                                                8,  9}};
+    TestTensor cIn{armnn::TensorShape{1,1,2,1},{10, 11}};
+
+    TestTensor expected{armnn::TensorShape{6,1,2,1},{0,  1,
+                                                     2,  3,
+                                                     4,  5,
+                                                     6,  7,
+                                                     8,  9,
+                                                     10, 11}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+}
+
+BOOST_AUTO_TEST_CASE(SimpleConcatAxis1)
+{
+    int32_t axis = 1;
+    TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
+    TestTensor bIn{armnn::TensorShape{1,1,1,1},{1}};
+    TestTensor cIn{armnn::TensorShape{1,1,1,1},{2}};
+
+    TestTensor expected{armnn::TensorShape{1,3,1,1},{0,1,2}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+}
+
+BOOST_AUTO_TEST_CASE(ConcatAxis1_NoInterleave)
+{
+    int32_t axis = 1;
+    TestTensor aIn{armnn::TensorShape{1,2,2,1},{0,  1,
+                                                2,  3}};
+    TestTensor bIn{armnn::TensorShape{1,3,2,1},{4,  5,
+                                                6,  7,
+                                                8,  9}};
+    TestTensor cIn{armnn::TensorShape{1,1,2,1},{10, 11}};
+
+    TestTensor expected{armnn::TensorShape{1,6,2,1},{0,  1,
+                                                     2,  3,
+                                                     4,  5,
+                                                     6,  7,
+                                                     8,  9,
+                                                     10, 11}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+}
+
+BOOST_AUTO_TEST_CASE(SimpleConcatAxis1_DoInterleave)
+{
+    int32_t axis = 1;
+    TestTensor aIn{armnn::TensorShape{2,2,1,1},{0,  1,
+                                                2,  3}};
+    TestTensor bIn{armnn::TensorShape{2,3,1,1},{4,  5,  6,
+                                                7,  8,  9}};
+    TestTensor cIn{armnn::TensorShape{2,1,1,1},{10,
+                                                11}};
+
+    TestTensor expected{armnn::TensorShape{2,6,1,1},{0, 1, 4, 5, 6, 10,
+                                                     2, 3, 7, 8, 9, 11}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+}
+
+BOOST_AUTO_TEST_CASE(SimpleConcatAxis2)
+{
+    int32_t axis = 2;
+    TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
+    TestTensor bIn{armnn::TensorShape{1,1,1,1},{1}};
+    TestTensor cIn{armnn::TensorShape{1,1,1,1},{2}};
+
+    TestTensor expected{armnn::TensorShape{1,1,3,1},{0,1,2}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+}
+
+BOOST_AUTO_TEST_CASE(ConcatAxis2_NoInterleave)
+{
+    int32_t axis = 2;
+    TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
+                                                2,  3}};
+    TestTensor bIn{armnn::TensorShape{1,1,3,2},{4,  5,
+                                                6,  7,
+                                                8,  9}};
+    TestTensor cIn{armnn::TensorShape{1,1,1,2},{10, 11}};
+
+    TestTensor expected{armnn::TensorShape{1,1,6,2},{0,  1,
+                                                     2,  3,
+                                                     4,  5,
+                                                     6,  7,
+                                                     8,  9,
+                                                     10, 11}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+}
+
+BOOST_AUTO_TEST_CASE(SimpleConcatAxis2_DoInterleave)
+{
+    int32_t axis = 2;
+    TestTensor aIn{armnn::TensorShape{1,2,2,1},{0,  1,
+                                                2,  3}};
+    TestTensor bIn{armnn::TensorShape{1,2,3,1},{4,  5,  6,
+                                                7,  8,  9}};
+    TestTensor cIn{armnn::TensorShape{1,2,1,1},{10,
+                                                11}};
+
+    TestTensor expected{armnn::TensorShape{1,2,6,1},{0, 1, 4, 5, 6, 10,
+                                                     2, 3, 7, 8, 9, 11}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+}
+
+BOOST_AUTO_TEST_CASE(SimpleConcatAxis3)
+{
+    int32_t axis = 3;
+    TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
+    TestTensor bIn{armnn::TensorShape{1,1,1,1},{1}};
+    TestTensor cIn{armnn::TensorShape{1,1,1,1},{2}};
+
+    TestTensor expected{armnn::TensorShape{1,1,1,3},{0,1,2}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+}
+
+BOOST_AUTO_TEST_CASE(SimpleConcatAxis3_DoInterleave)
+{
+    int32_t axis = 3;
+    TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
+                                                2,  3}};
+    TestTensor bIn{armnn::TensorShape{1,1,2,3},{4,  5,  6,
+                                                7,  8,  9}};
+    TestTensor cIn{armnn::TensorShape{1,1,2,1},{10,
+                                                11}};
+
+    TestTensor expected{armnn::TensorShape{1,1,2,6},{0, 1, 4, 5, 6, 10,
+                                                     2, 3, 7, 8, 9, 11}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+}
+
+BOOST_AUTO_TEST_CASE(AxisTooBig)
+{
+    int32_t axis = 4;
+    TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
+    TestTensor bIn{armnn::TensorShape{1,1,1,1},{0}};
+
+    // The axis must be within the range of [-rank(values), rank(values))
+    // see: https://www.tensorflow.org/api_docs/python/tf/concat
+    TestTensor uncheckedOutput{armnn::TensorShape{1,1,1,1},{0}};
+    ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE;
+    MergerTestImpl({&aIn, &bIn}, axis, uncheckedOutput, expectedParserStatus);
+}
+
+BOOST_AUTO_TEST_CASE(AxisTooSmall)
+{
+    int32_t axis = -5;
+    TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
+    TestTensor bIn{armnn::TensorShape{1,1,1,1},{0}};
+
+    // The axis must be within the range of [-rank(values), rank(values))
+    // see: https://www.tensorflow.org/api_docs/python/tf/concat
+    TestTensor uncheckedOutput{armnn::TensorShape{1,1,1,1},{0}};
+    ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE;
+    MergerTestImpl({&aIn, &bIn}, axis, uncheckedOutput, expectedParserStatus);
+}
+
+BOOST_AUTO_TEST_CASE(TooFewInputs)
+{
+    int32_t axis = 0;
+    TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
+
+    // We need at least two tensors to concatenate
+    ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE;
+    MergerTestImpl({&aIn}, axis, aIn, expectedParserStatus);
+}
+
+BOOST_AUTO_TEST_CASE(MismatchedInputDimensions)
+{
+    int32_t axis = 3;
+    TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
+                                                2,  3}};
+    TestTensor bIn{armnn::TensorShape{1,1,2,3},{4,  5,  6,
+                                                7,  8,  9}};
+    TestTensor mismatched{armnn::TensorShape{1,1,1,1},{10}};
+
+    TestTensor expected{armnn::TensorShape{1,1,2,6},{0, 1, 4, 5, 6, 10,
+                                                     2, 3, 7, 8, 9, 11}};
+
+    // The input dimensions must be compatible
+    ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE;
+    MergerTestImpl({&aIn, &bIn, &mismatched}, axis, expected, expectedParserStatus);
+}
+
+BOOST_AUTO_TEST_CASE(MismatchedInputRanks)
+{
+    int32_t axis = 2;
+    TestTensor aIn{armnn::TensorShape{1,1,2},{0,1}};
+    TestTensor bIn{armnn::TensorShape{1,1},{4}};
+    TestTensor expected{armnn::TensorShape{1,1,3},{0,1,4}};
+
+    // The input dimensions must be compatible
+    ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE;
+    MergerTestImpl({&aIn, &bIn}, axis, expected, expectedParserStatus);
+}
+
+BOOST_AUTO_TEST_CASE(MismatchedOutputDimensions)
+{
+    int32_t axis = 3;
+    TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
+                                                2,  3}};
+    TestTensor bIn{armnn::TensorShape{1,1,2,3},{4,  5,  6,
+                                                7,  8,  9}};
+    TestTensor cIn{armnn::TensorShape{1,1,2,1},{10,
+                                                11}};
+
+    TestTensor mismatched{armnn::TensorShape{1,1,6,2},{0, 1, 4, 5, 6, 10,
+                                                       2, 3, 7, 8, 9, 11}};
+
+    // The input and output dimensions must be compatible
+    ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE;
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, expectedParserStatus);
+}
+
+BOOST_AUTO_TEST_CASE(MismatchedOutputRank)
+{
+    int32_t axis = 3;
+    TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
+                                                2,  3}};
+    TestTensor bIn{armnn::TensorShape{1,1,2,3},{4,  5,  6,
+                                                7,  8,  9}};
+    TestTensor cIn{armnn::TensorShape{1,1,2,1},{10,
+                                                11}};
+
+    TestTensor mismatched{armnn::TensorShape{6,2},{0, 1, 4, 5, 6, 10,
+                                                   2, 3, 7, 8, 9, 11}};
+
+    // The input and output ranks must match
+    ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE;
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, expectedParserStatus);
+}
+
+BOOST_AUTO_TEST_CASE(ValidNegativeAxis)
+{
+    // this is the same as 3
+    // see: https://www.tensorflow.org/api_docs/python/tf/concat
+    int32_t axis = -1;
+    TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
+                                                2,  3}};
+    TestTensor bIn{armnn::TensorShape{1,1,2,3},{4,  5,  6,
+                                                7,  8,  9}};
+    TestTensor cIn{armnn::TensorShape{1,1,2,1},{10,
+                                                11}};
+
+    TestTensor expected{armnn::TensorShape{1,1,2,6},{0, 1, 4, 5, 6, 10,
+                                                     2, 3, 7, 8, 9, 11}};
+
+    MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/SystemProperties.cpp b/test/SystemProperties.cpp
new file mode 100644
index 00000000..9bdf151e
--- /dev/null
+++ b/test/SystemProperties.cpp
@@ -0,0 +1,57 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "DriverTestHelpers.hpp"
+#include <boost/test/unit_test.hpp>
+#include <log/log.h>
+#include "../SystemPropertiesUtils.hpp"
+
+BOOST_AUTO_TEST_SUITE(SystemProperiesTests)
+
+BOOST_AUTO_TEST_CASE(SystemProperties)
+{
+    // Test default value
+    {
+        auto p = __system_property_find("thisDoesNotExist");
+        BOOST_TEST((p == nullptr));
+
+        int defaultValue = ParseSystemProperty("thisDoesNotExist", -4);
+        BOOST_TEST((defaultValue == -4));
+    }
+
+    //  Test default value from bad data type
+    {
+        __system_property_set("thisIsNotFloat", "notfloat");
+        float defaultValue = ParseSystemProperty("thisIsNotFloat", 0.1f);
+        BOOST_TEST((defaultValue == 0.1f));
+    }
+
+    // Test fetching bool values
+    {
+        __system_property_set("myTestBool", "1");
+        bool b = ParseSystemProperty("myTestBool", false);
+        BOOST_TEST((b == true));
+    }
+    {
+        __system_property_set("myTestBool", "0");
+        bool b = ParseSystemProperty("myTestBool", true);
+        BOOST_TEST((b == false));
+    }
+
+    // Test fetching int
+    {
+        __system_property_set("myTestInt", "567");
+        int i = ParseSystemProperty("myTestInt", 890);
+        BOOST_TEST((i==567));
+    }
+
+    // Test fetching float
+    {
+        __system_property_set("myTestFloat", "1.2f");
+        float f = ParseSystemProperty("myTestFloat", 3.4f);
+        BOOST_TEST((f==1.2f));
+    }
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp
new file mode 100644
index 00000000..0766ef50
--- /dev/null
+++ b/test/TestTensor.cpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "TestTensor.hpp"
+
+namespace driverTestHelpers
+{
+
+hidl_vec<uint32_t> TestTensor::GetDimensions() const
+{
+    hidl_vec<uint32_t> dimensions;
+    dimensions.resize(m_Shape.GetNumDimensions());
+    for (uint32_t i=0; i<m_Shape.GetNumDimensions(); ++i)
+    {
+        dimensions[i] = m_Shape[i];
+    }
+    return dimensions;
+}
+
+unsigned int TestTensor::GetNumElements() const
+{
+    return m_Shape.GetNumElements();
+}
+
+const float * TestTensor::GetData() const
+{
+    BOOST_ASSERT(m_Data.empty() == false);
+    return &m_Data[0];
+}
+
+} // namespace driverTestHelpers
diff --git a/test/TestTensor.hpp b/test/TestTensor.hpp
new file mode 100644
index 00000000..974e7b93
--- /dev/null
+++ b/test/TestTensor.hpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include "../ArmnnDriver.hpp"
+
+namespace driverTestHelpers
+{
+
+class TestTensor
+{
+public:
+    TestTensor(const armnn::TensorShape & shape,
+               const std::vector<float> & data)
+    : m_Shape{shape}
+    , m_Data{data}
+    {
+        BOOST_ASSERT(m_Shape.GetNumElements() == m_Data.size());
+    }
+
+    hidl_vec<uint32_t> GetDimensions() const;
+    unsigned int GetNumElements() const;
+    const float * GetData() const;
+
+private:
+    armnn::TensorShape   m_Shape;
+    std::vector<float>   m_Data;
+};
+
+} // driverTestHelpers
diff --git a/test/Tests.cpp b/test/Tests.cpp
index 0ab2908b..37aece7c 100644
--- a/test/Tests.cpp
+++ b/test/Tests.cpp
@@ -2,43 +2,18 @@
 // Copyright © 2017 Arm Ltd. All rights reserved.
 // See LICENSE file in the project root for full license information.
 //
-
 #define LOG_TAG "ArmnnDriverTests"
 #define BOOST_TEST_MODULE armnn_driver_tests
 #include <boost/test/unit_test.hpp>
 #include <log/log.h>
 
-#include "../ArmnnDriver.hpp"
-#include "../SystemPropertiesUtils.hpp"
-
-#include "OperationsUtils.h"
-
-#include <condition_variable>
-
-namespace android
-{
-namespace hardware
-{
-namespace neuralnetworks
-{
-namespace V1_0
-{
-
-std::ostream& operator<<(std::ostream& os, ErrorStatus stat)
-{
-   return os << static_cast<int>(stat);
-}
-
-}
-}
-}
-}
+#include "DriverTestHelpers.hpp"
 
 BOOST_AUTO_TEST_SUITE(DriverTests)
 
-using namespace armnn_driver;
-using namespace android::nn;
-using namespace android;
+using ArmnnDriver = armnn_driver::ArmnnDriver;
+using DriverOptions = armnn_driver::DriverOptions;
+using namespace driverTestHelpers;
 
 BOOST_AUTO_TEST_CASE(Init)
 {
@@ -73,904 +48,4 @@ BOOST_AUTO_TEST_CASE(TestCapabilities)
     BOOST_TEST(cap.quantized8Performance.powerUsage > 0.f);
 }
 
-BOOST_AUTO_TEST_CASE(SystemProperties)
-{
-    // Test default value
-    {
-        auto p = __system_property_find("thisDoesNotExist");
-        BOOST_TEST((p == nullptr));
-
-        int defaultValue = ParseSystemProperty("thisDoesNotExist", -4);
-        BOOST_TEST((defaultValue == -4));
-    }
-
-    //  Test default value from bad data type
-    {
-        __system_property_set("thisIsNotFloat", "notfloat");
-        float defaultValue = ParseSystemProperty("thisIsNotFloat", 0.1f);
-        BOOST_TEST((defaultValue == 0.1f));
-    }
-
-    // Test fetching bool values
-    {
-        __system_property_set("myTestBool", "1");
-        bool b = ParseSystemProperty("myTestBool", false);
-        BOOST_TEST((b == true));
-    }
-    {
-        __system_property_set("myTestBool", "0");
-        bool b = ParseSystemProperty("myTestBool", true);
-        BOOST_TEST((b == false));
-    }
-
-    // Test fetching int
-    {
-        __system_property_set("myTestInt", "567");
-        int i = ParseSystemProperty("myTestInt", 890);
-        BOOST_TEST((i==567));
-    }
-
-    // Test fetching float
-    {
-        __system_property_set("myTestFloat", "1.2f");
-        float f = ParseSystemProperty("myTestFloat", 3.4f);
-        BOOST_TEST((f==1.2f));
-    }
-}
-
-// The following are helpers for writing unit tests for the driver
-namespace
-{
-
-struct ExecutionCallback : public IExecutionCallback
-{
-    ExecutionCallback()
-        : mNotified(false)
-    {
-    }
-
-    Return<void> notify(ErrorStatus status) override
-    {
-        (void)status;
-        ALOGI("ExecutionCallback::notify invoked");
-        std::lock_guard<std::mutex> executionLock(mMutex);
-        mNotified = true;
-        mCondition.notify_one();
-        return Void();
-    }
-
-    /// wait until the callback has notified us that it is done
-    Return<void> wait()
-    {
-        ALOGI("ExecutionCallback::wait invoked");
-        std::unique_lock<std::mutex> executionLock(mMutex);
-        while (!mNotified)
-        {
-            mCondition.wait(executionLock);
-        }
-        mNotified = false;
-        return Void();
-    }
-
-private:
-    // use a mutex and a condition variable to wait for asynchronous callbacks
-    std::mutex mMutex;
-    std::condition_variable mCondition;
-    // and a flag, in case we are notified before the wait call
-    bool mNotified;
-};
-
-class PreparedModelCallback : public IPreparedModelCallback
-{
-public:
-    PreparedModelCallback()
-    {
-    }
-
-    ~PreparedModelCallback() override
-    {
-    }
-
-    Return<void> notify(ErrorStatus status, const sp<IPreparedModel>& preparedModel) override
-    {
-        m_ErrorStatus = status;
-        m_PreparedModel = preparedModel;
-        return Void();
-    }
-
-    ErrorStatus GetErrorStatus()
-    {
-        return m_ErrorStatus;
-    }
-
-    sp<IPreparedModel> GetPreparedModel()
-    {
-        return m_PreparedModel;
-    }
-
-
-private:
-    ErrorStatus        m_ErrorStatus;
-    sp<IPreparedModel> m_PreparedModel;
-};
-
-// lifted from common/Utils.cpp
-hidl_memory allocateSharedMemory(int64_t size)
-{
-    hidl_memory memory;
-
-    const std::string& type      = "ashmem";
-    android::sp<IAllocator>     allocator = IAllocator::getService(type);
-    allocator->allocate(size, [&](bool success, const hidl_memory& mem) {
-        if (!success)
-        {
-            ALOGE("unable to allocate %li bytes of %s", size, type.c_str());
-        }
-        else
-        {
-            memory = mem;
-        }
-    });
-
-    return memory;
-}
-
-
-android::sp<IMemory> AddPoolAndGetData(uint32_t size, Request& request)
-{
-    hidl_memory pool;
-
-    android::sp<IAllocator> allocator = IAllocator::getService("ashmem");
-    allocator->allocate(sizeof(float) * size, [&](bool success, const hidl_memory& mem) {
-        BOOST_TEST(success);
-        pool = mem;
-    });
-
-    request.pools.resize(request.pools.size() + 1);
-    request.pools[request.pools.size() - 1] = pool;
-
-    android::sp<IMemory> mapped = mapMemory(pool);
-    mapped->update();
-    return mapped;
-}
-
-void AddPoolAndSetData(uint32_t size, Request& request, float* data)
-{
-    android::sp<IMemory> memory = AddPoolAndGetData(size, request);
-
-    float* dst = static_cast<float*>(static_cast<void*>(memory->getPointer()));
-
-    memcpy(dst, data, size * sizeof(float));
-}
-
-void AddOperand(Model& model, const Operand& op)
-{
-    model.operands.resize(model.operands.size() + 1);
-    model.operands[model.operands.size() - 1] = op;
-}
-
-void AddIntOperand(Model& model, int32_t value)
-{
-    DataLocation location = {};
-    location.offset = model.operandValues.size();
-    location.length = sizeof(int32_t);
-
-    Operand op    = {};
-    op.type = OperandType::INT32;
-    op.dimensions = hidl_vec<uint32_t>{};
-    op.lifetime   = OperandLifeTime::CONSTANT_COPY;
-    op.location   = location;
-
-    model.operandValues.resize(model.operandValues.size() + location.length);
-    *reinterpret_cast<int32_t*>(&model.operandValues[location.offset]) = value;
-
-    AddOperand(model, op);
-}
-
-template<typename T>
-OperandType TypeToOperandType();
-
-template<>
-OperandType TypeToOperandType<float>()
-{
-    return OperandType::TENSOR_FLOAT32;
-};
-
-template<>
-OperandType TypeToOperandType<int32_t>()
-{
-    return OperandType::TENSOR_INT32;
-};
-
-
-
-template<typename T>
-void AddTensorOperand(Model& model, hidl_vec<uint32_t> dimensions, T* values)
-{
-    uint32_t totalElements = 1;
-    for (uint32_t dim : dimensions)
-    {
-        totalElements *= dim;
-    }
-
-    DataLocation location = {};
-    location.offset = model.operandValues.size();
-    location.length = totalElements * sizeof(T);
-
-    Operand op    = {};
-    op.type       = TypeToOperandType<T>();
-    op.dimensions = dimensions;
-    op.lifetime   = OperandLifeTime::CONSTANT_COPY;
-    op.location   = location;
-
-    model.operandValues.resize(model.operandValues.size() + location.length);
-    for (uint32_t i = 0; i < totalElements; i++)
-    {
-        *(reinterpret_cast<T*>(&model.operandValues[location.offset]) + i) = values[i];
-    }
-
-    AddOperand(model, op);
-}
-
-void AddInputOperand(Model& model, hidl_vec<uint32_t> dimensions)
-{
-    Operand op    = {};
-    op.type       = OperandType::TENSOR_FLOAT32;
-    op.dimensions = dimensions;
-    op.lifetime   = OperandLifeTime::MODEL_INPUT;
-
-    AddOperand(model, op);
-
-    model.inputIndexes.resize(model.inputIndexes.size() + 1);
-    model.inputIndexes[model.inputIndexes.size() - 1] = model.operands.size() - 1;
-}
-
-void AddOutputOperand(Model& model, hidl_vec<uint32_t> dimensions)
-{
-    Operand op = {};
-    op.type       = OperandType::TENSOR_FLOAT32;
-    op.dimensions = dimensions;
-    op.lifetime   = OperandLifeTime::MODEL_OUTPUT;
-
-    AddOperand(model, op);
-
-    model.outputIndexes.resize(model.outputIndexes.size() + 1);
-    model.outputIndexes[model.outputIndexes.size() - 1] = model.operands.size() - 1;
-}
-
-android::sp<IPreparedModel> PrepareModel(const Model& model, ArmnnDriver& driver)
-{
-
-    sp<PreparedModelCallback> cb(new PreparedModelCallback());
-    driver.prepareModel(model, cb);
-
-    BOOST_TEST((cb->GetErrorStatus() == ErrorStatus::NONE));
-    BOOST_TEST((cb->GetPreparedModel() != nullptr));
-
-    return cb->GetPreparedModel();
-}
-
-void Execute(android::sp<IPreparedModel> preparedModel, const Request& request)
-{
-    sp<ExecutionCallback> cb(new ExecutionCallback());
-    BOOST_TEST(preparedModel->execute(request, cb) == ErrorStatus::NONE);
-    ALOGI("Execute: waiting for callback to be invoked");
-    cb->wait();
-}
-
-sp<ExecutionCallback> ExecuteNoWait(android::sp<IPreparedModel> preparedModel, const Request& request)
-{
-    sp<ExecutionCallback> cb(new ExecutionCallback());
-    BOOST_TEST(preparedModel->execute(request, cb) == ErrorStatus::NONE);
-    ALOGI("ExecuteNoWait: returning callback object");
-    return cb;
-}
-}
-
-// Add our own test here since we fail the fc tests which Google supplies (because of non-const weights)
-BOOST_AUTO_TEST_CASE(FullyConnected)
-{
-    // this should ideally replicate fully_connected_float.model.cpp
-    // but that uses slightly weird dimensions which I don't think we need to support for now
-
-    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
-    Model model = {};
-
-    // add operands
-    int32_t actValue      = 0;
-    float   weightValue[] = {2, 4, 1};
-    float   biasValue[]   = {4};
-
-    AddInputOperand(model, hidl_vec<uint32_t>{1, 3});
-    AddTensorOperand(model, hidl_vec<uint32_t>{1, 3}, weightValue);
-    AddTensorOperand(model, hidl_vec<uint32_t>{1}, biasValue);
-    AddIntOperand(model, actValue);
-    AddOutputOperand(model, hidl_vec<uint32_t>{1, 1});
-
-    // make the fully connected operation
-    model.operations.resize(1);
-    model.operations[0].type = OperationType::FULLY_CONNECTED;
-    model.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2, 3};
-    model.operations[0].outputs = hidl_vec<uint32_t>{4};
-
-    // make the prepared model
-    android::sp<IPreparedModel> preparedModel = PrepareModel(model, *driver);
-
-    // construct the request
-    DataLocation inloc = {};
-    inloc.poolIndex = 0;
-    inloc.offset    = 0;
-    inloc.length    = 3 * sizeof(float);
-    RequestArgument input = {};
-    input.location = inloc;
-    input.dimensions = hidl_vec<uint32_t>{};
-
-    DataLocation outloc = {};
-    outloc.poolIndex = 1;
-    outloc.offset    = 0;
-    outloc.length    = 1 * sizeof(float);
-    RequestArgument output = {};
-    output.location  = outloc;
-    output.dimensions = hidl_vec<uint32_t>{};
-
-    Request request = {};
-    request.inputs  = hidl_vec<RequestArgument>{input};
-    request.outputs = hidl_vec<RequestArgument>{output};
-
-    // set the input data (matching source test)
-    float indata[] = {2, 32, 16};
-    AddPoolAndSetData(3, request, indata);
-
-    // add memory for the output
-    android::sp<IMemory> outMemory = AddPoolAndGetData(1, request);
-    float* outdata = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
-
-    // run the execution
-    Execute(preparedModel, request);
-
-    // check the result
-    BOOST_TEST(outdata[0] == 152);
-}
-
-// Add our own test for concurrent execution
-// The main point of this test is to check that multiple requests can be
-// executed without waiting for the callback from previous execution.
-// The operations performed are not significant.
-BOOST_AUTO_TEST_CASE(ConcurrentExecute)
-{
-    ALOGI("ConcurrentExecute: entry");
-
-    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
-    Model model = {};
-
-    // add operands
-    int32_t actValue      = 0;
-    float   weightValue[] = {2, 4, 1};
-    float   biasValue[]   = {4};
-
-    AddInputOperand(model, hidl_vec<uint32_t>{1, 3});
-    AddTensorOperand(model, hidl_vec<uint32_t>{1, 3}, weightValue);
-    AddTensorOperand(model, hidl_vec<uint32_t>{1}, biasValue);
-    AddIntOperand(model, actValue);
-    AddOutputOperand(model, hidl_vec<uint32_t>{1, 1});
-
-    // make the fully connected operation
-    model.operations.resize(1);
-    model.operations[0].type = OperationType::FULLY_CONNECTED;
-    model.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2, 3};
-    model.operations[0].outputs = hidl_vec<uint32_t>{4};
-
-    // make the prepared models
-    const size_t maxRequests = 5;
-    android::sp<IPreparedModel> preparedModels[maxRequests];
-    for (size_t i = 0; i < maxRequests; ++i)
-    {
-        preparedModels[i] = PrepareModel(model, *driver);
-    }
-
-    // construct the request data
-    DataLocation inloc = {};
-    inloc.poolIndex = 0;
-    inloc.offset    = 0;
-    inloc.length    = 3 * sizeof(float);
-    RequestArgument input = {};
-    input.location = inloc;
-    input.dimensions = hidl_vec<uint32_t>{};
-
-    DataLocation outloc = {};
-    outloc.poolIndex = 1;
-    outloc.offset    = 0;
-    outloc.length    = 1 * sizeof(float);
-    RequestArgument output = {};
-    output.location  = outloc;
-    output.dimensions = hidl_vec<uint32_t>{};
-
-    // build the requests
-    Request requests[maxRequests];
-    android::sp<IMemory> outMemory[maxRequests];
-    float* outdata[maxRequests];
-    for (size_t i = 0; i < maxRequests; ++i)
-    {
-        requests[i].inputs  = hidl_vec<RequestArgument>{input};
-        requests[i].outputs = hidl_vec<RequestArgument>{output};
-        // set the input data (matching source test)
-        float indata[] = {2, 32, 16};
-        AddPoolAndSetData(3, requests[i], indata);
-        // add memory for the output
-        outMemory[i] = AddPoolAndGetData(1, requests[i]);
-        outdata[i] = static_cast<float*>(static_cast<void*>(outMemory[i]->getPointer()));
-    }
-
-    // invoke the execution of the requests
-    ALOGI("ConcurrentExecute: executing requests");
-    sp<ExecutionCallback> cb[maxRequests];
-    for (size_t i = 0; i < maxRequests; ++i)
-    {
-        cb[i] = ExecuteNoWait(preparedModels[i], requests[i]);
-    }
-
-    // wait for the requests to complete
-    ALOGI("ConcurrentExecute: waiting for callbacks");
-    for (size_t i = 0; i < maxRequests; ++i)
-    {
-        cb[i]->wait();
-    }
-
-    // check the results
-    ALOGI("ConcurrentExecute: validating results");
-    for (size_t i = 0; i < maxRequests; ++i)
-    {
-        BOOST_TEST(outdata[i][0] == 152);
-    }
-    ALOGI("ConcurrentExecute: exit");
-}
-
-BOOST_AUTO_TEST_CASE(GetSupportedOperations)
-{
-    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
-
-    ErrorStatus error;
-    std::vector<bool> sup;
-
-    ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
-    {
-        error = status;
-        sup = supported;
-    };
-
-    Model model1 = {};
-
-    // add operands
-    int32_t actValue      = 0;
-    float   weightValue[] = {2, 4, 1};
-    float   biasValue[]   = {4};
-
-    AddInputOperand(model1, hidl_vec<uint32_t>{1, 3});
-    AddTensorOperand(model1, hidl_vec<uint32_t>{1, 3}, weightValue);
-    AddTensorOperand(model1, hidl_vec<uint32_t>{1}, biasValue);
-    AddIntOperand(model1, actValue);
-    AddOutputOperand(model1, hidl_vec<uint32_t>{1, 1});
-
-    // make a correct fully connected operation
-    model1.operations.resize(2);
-    model1.operations[0].type = OperationType::FULLY_CONNECTED;
-    model1.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2, 3};
-    model1.operations[0].outputs = hidl_vec<uint32_t>{4};
-
-    // make an incorrect fully connected operation
-    AddIntOperand(model1, actValue);
-    AddOutputOperand(model1, hidl_vec<uint32_t>{1, 1});
-    model1.operations[1].type = OperationType::FULLY_CONNECTED;
-    model1.operations[1].inputs = hidl_vec<uint32_t>{4};
-    model1.operations[1].outputs = hidl_vec<uint32_t>{5};
-
-    driver->getSupportedOperations(model1, cb);
-    BOOST_TEST((int)error == (int)ErrorStatus::NONE);
-    BOOST_TEST(sup[0] == true);
-    BOOST_TEST(sup[1] == false);
-
-    // Broadcast add/mul are not supported
-    Model model2 = {};
-
-    AddInputOperand(model2, hidl_vec<uint32_t>{1, 1, 3, 4});
-    AddInputOperand(model2, hidl_vec<uint32_t>{4});
-    AddOutputOperand(model2, hidl_vec<uint32_t>{1, 1, 3, 4});
-    AddOutputOperand(model2, hidl_vec<uint32_t>{1, 1, 3, 4});
-
-    model2.operations.resize(2);
-
-    model2.operations[0].type = OperationType::ADD;
-    model2.operations[0].inputs = hidl_vec<uint32_t>{0,1};
-    model2.operations[0].outputs = hidl_vec<uint32_t>{2};
-
-    model2.operations[1].type = OperationType::MUL;
-    model2.operations[1].inputs = hidl_vec<uint32_t>{0,1};
-    model2.operations[1].outputs = hidl_vec<uint32_t>{3};
-
-    driver->getSupportedOperations(model2, cb);
-    BOOST_TEST((int)error == (int)ErrorStatus::NONE);
-    BOOST_TEST(sup[0] == false);
-    BOOST_TEST(sup[1] == false);
-
-    Model model3 = {};
-
-    // Add unsupported operation, should return no error but we don't support it
-    AddInputOperand(model3, hidl_vec<uint32_t>{1, 1, 1, 8});
-    AddIntOperand(model3, 2);
-    AddOutputOperand(model3, hidl_vec<uint32_t>{1, 2, 2, 2});
-    model3.operations.resize(1);
-    model3.operations[0].type = OperationType::DEPTH_TO_SPACE;
-    model1.operations[0].inputs = hidl_vec<uint32_t>{0, 1};
-    model3.operations[0].outputs = hidl_vec<uint32_t>{2};
-
-    driver->getSupportedOperations(model3, cb);
-    BOOST_TEST((int)error == (int)ErrorStatus::NONE);
-    BOOST_TEST(sup[0] == false);
-
-    // Add invalid operation
-    Model model4 = {};
-    AddIntOperand(model4, 0);
-    model4.operations.resize(1);
-    model4.operations[0].type = static_cast<OperationType>(100);
-    model4.operations[0].outputs = hidl_vec<uint32_t>{0};
-
-    driver->getSupportedOperations(model4, cb);
-    BOOST_TEST((int)error == (int)ErrorStatus::INVALID_ARGUMENT);
-}
-
-// The purpose of this test is to ensure that when encountering an unsupported operation
-//      it is skipped and getSupportedOperations() continues (rather than failing and stopping).
-//      As per IVGCVSW-710.
-BOOST_AUTO_TEST_CASE(UnsupportedLayerContinueOnFailure)
-{
-    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
-
-    ErrorStatus error;
-    std::vector<bool> sup;
-
-    ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
-    {
-        error = status;
-        sup = supported;
-    };
-
-    Model model = {};
-
-    // operands
-    int32_t actValue      = 0;
-    float   weightValue[] = {2, 4, 1};
-    float   biasValue[]   = {4};
-
-    // broadcast add is unsupported at the time of writing this test, but any unsupported layer will do
-    AddInputOperand(model, hidl_vec<uint32_t>{1, 1, 3, 4});
-    AddInputOperand(model, hidl_vec<uint32_t>{4});
-    AddOutputOperand(model, hidl_vec<uint32_t>{1, 1, 3, 4});
-
-    // fully connected
-    AddInputOperand(model, hidl_vec<uint32_t>{1, 3});
-    AddTensorOperand(model, hidl_vec<uint32_t>{1, 3}, weightValue);
-    AddTensorOperand(model, hidl_vec<uint32_t>{1}, biasValue);
-    AddIntOperand(model, actValue);
-    AddOutputOperand(model, hidl_vec<uint32_t>{1, 1});
-
-    // broadcast mul is unsupported
-    AddOutputOperand(model, hidl_vec<uint32_t>{1, 1, 3, 4});
-
-    model.operations.resize(3);
-
-    // unsupported
-    model.operations[0].type = OperationType::ADD;
-    model.operations[0].inputs = hidl_vec<uint32_t>{0,1};
-    model.operations[0].outputs = hidl_vec<uint32_t>{2};
-
-    // supported
-    model.operations[1].type = OperationType::FULLY_CONNECTED;
-    model.operations[1].inputs  = hidl_vec<uint32_t>{3, 4, 5, 6};
-    model.operations[1].outputs = hidl_vec<uint32_t>{7};
-
-    // unsupported
-    model.operations[2].type = OperationType::MUL;
-    model.operations[2].inputs = hidl_vec<uint32_t>{0,1};
-    model.operations[2].outputs = hidl_vec<uint32_t>{8};
-
-    // we are testing that the unsupported layers return false and the test continues
-    //      rather than failing and stopping.
-    driver->getSupportedOperations(model, cb);
-    BOOST_TEST((int)error == (int)ErrorStatus::NONE);
-    BOOST_TEST(sup[0] == false);
-    BOOST_TEST(sup[1] == true);
-    BOOST_TEST(sup[2] == false);
-}
-
-// The purpose of this test is to ensure that when encountering an failure
-//      during mem pool mapping we properly report an error to the framework via a callback
-BOOST_AUTO_TEST_CASE(ModelToINetworkConverterMemPoolFail)
-{
-    auto driver = std::make_unique<ArmnnDriver>(armnn::Compute::CpuRef);
-
-    ErrorStatus error;
-    std::vector<bool> sup;
-
-    ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
-    {
-        error = status;
-        sup = supported;
-    };
-
-    Model model = {};
-
-    model.pools = hidl_vec<hidl_memory>{hidl_memory("Unsuported hidl memory type", nullptr, 0)};
-
-    //memory pool mapping should fail, we should report an error
-    driver->getSupportedOperations(model, cb);
-    BOOST_TEST((int)error == (int)ErrorStatus::GENERAL_FAILURE);
-}
-
-namespace
-{
-
-void PaddingTestImpl(android::nn::PaddingScheme paddingScheme)
-{
-    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
-    Model model  = {};
-
-    uint32_t outSize = paddingScheme == kPaddingSame ? 2 : 1;
-
-    // add operands
-    float weightValue[] = {1, -1, 0, 1};
-    float biasValue[]   = {0};
-
-    AddInputOperand(model, hidl_vec<uint32_t>{1, 2, 3, 1});
-    AddTensorOperand(model, hidl_vec<uint32_t>{1, 2, 2, 1}, weightValue);
-    AddTensorOperand(model, hidl_vec<uint32_t>{1}, biasValue);
-    AddIntOperand(model, (int32_t)paddingScheme); // padding
-    AddIntOperand(model, 2); // stride x
-    AddIntOperand(model, 2); // stride y
-    AddIntOperand(model, 0); // no activation
-    AddOutputOperand(model, hidl_vec<uint32_t>{1, 1, outSize, 1});
-
-    // make the convolution operation
-    model.operations.resize(1);
-    model.operations[0].type = OperationType::CONV_2D;
-    model.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2, 3, 4, 5, 6};
-    model.operations[0].outputs = hidl_vec<uint32_t>{7};
-
-    // make the prepared model
-    android::sp<IPreparedModel> preparedModel = PrepareModel(model, *driver);
-
-    // construct the request
-    DataLocation inloc    = {};
-    inloc.poolIndex       = 0;
-    inloc.offset          = 0;
-    inloc.length          = 6 * sizeof(float);
-    RequestArgument input = {};
-    input.location        = inloc;
-    input.dimensions      = hidl_vec<uint32_t>{};
-
-    DataLocation outloc    = {};
-    outloc.poolIndex       = 1;
-    outloc.offset          = 0;
-    outloc.length          = outSize * sizeof(float);
-    RequestArgument output = {};
-    output.location        = outloc;
-    output.dimensions      = hidl_vec<uint32_t>{};
-
-    Request request = {};
-    request.inputs  = hidl_vec<RequestArgument>{input};
-    request.outputs = hidl_vec<RequestArgument>{output};
-
-
-    // set the input data (matching source test)
-    float indata[] = {4, 1, 0, 3, -1, 2};
-    AddPoolAndSetData(6, request, indata);
-
-    // add memory for the output
-    android::sp<IMemory> outMemory = AddPoolAndGetData(outSize, request);
-    float*               outdata   = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
-
-    // run the execution
-    Execute(preparedModel, request);
-
-    // check the result
-    if (paddingScheme == kPaddingValid)
-    {
-        BOOST_TEST(outdata[0] == 2);
-    }
-    else if (paddingScheme == kPaddingSame)
-    {
-        BOOST_TEST(outdata[0] == 2);
-        BOOST_TEST(outdata[1] == 0);
-    }
-    else
-    {
-        BOOST_TEST(false);
-    }
-}
-
-}
-
-BOOST_AUTO_TEST_CASE(ConvValidPadding)
-{
-    PaddingTestImpl(kPaddingValid);
-}
-
-BOOST_AUTO_TEST_CASE(ConvSamePadding)
-{
-    PaddingTestImpl(kPaddingSame);
-}
-
-BOOST_AUTO_TEST_CASE(TestFullyConnected4dInput)
-{
-    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
-
-    ErrorStatus error;
-    std::vector<bool> sup;
-
-    ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
-        {
-            error = status;
-            sup = supported;
-        };
-
-    Model model = {};
-
-    // operands
-    int32_t actValue      = 0;
-    float   weightValue[] = {1, 0, 0, 0, 0, 0, 0, 0,
-                             0, 1, 0, 0, 0, 0, 0, 0,
-                             0, 0, 1, 0, 0, 0, 0, 0,
-                             0, 0, 0, 1, 0, 0, 0, 0,
-                             0, 0, 0, 0, 1, 0, 0, 0,
-                             0, 0, 0, 0, 0, 1, 0, 0,
-                             0, 0, 0, 0, 0, 0, 1, 0,
-                             0, 0, 0, 0, 0, 0, 0, 1}; //identity
-    float   biasValue[]   = {0, 0, 0, 0, 0, 0, 0, 0};
-
-    // fully connected operation
-    AddInputOperand(model, hidl_vec<uint32_t>{1, 1, 1, 8});
-    AddTensorOperand(model, hidl_vec<uint32_t>{8, 8}, weightValue);
-    AddTensorOperand(model, hidl_vec<uint32_t>{8}, biasValue);
-    AddIntOperand(model, actValue);
-    AddOutputOperand(model, hidl_vec<uint32_t>{1, 8});
-
-    model.operations.resize(1);
-
-    model.operations[0].type = OperationType::FULLY_CONNECTED;
-    model.operations[0].inputs  = hidl_vec<uint32_t>{0,1,2,3};
-    model.operations[0].outputs = hidl_vec<uint32_t>{4};
-
-    // make the prepared model
-    android::sp<IPreparedModel> preparedModel = PrepareModel(model, *driver);
-
-
-    // construct the request
-    DataLocation inloc = {};
-    inloc.poolIndex = 0;
-    inloc.offset    = 0;
-    inloc.length    = 8 * sizeof(float);
-    RequestArgument input = {};
-    input.location = inloc;
-    input.dimensions = hidl_vec<uint32_t>{};
-
-    DataLocation outloc = {};
-    outloc.poolIndex = 1;
-    outloc.offset    = 0;
-    outloc.length    = 8 * sizeof(float);
-    RequestArgument output = {};
-    output.location  = outloc;
-    output.dimensions = hidl_vec<uint32_t>{};
-
-    Request request = {};
-    request.inputs  = hidl_vec<RequestArgument>{input};
-    request.outputs = hidl_vec<RequestArgument>{output};
-
-    // set the input data
-    float indata[] = {1,2,3,4,5,6,7,8};
-    AddPoolAndSetData(8, request, indata);
-
-    // add memory for the output
-    android::sp<IMemory> outMemory = AddPoolAndGetData(8, request);
-    float* outdata = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
-
-    // run the execution
-    Execute(preparedModel, request);
-
-    // check the result
-    BOOST_TEST(outdata[0] == 1);
-    BOOST_TEST(outdata[1] == 2);
-    BOOST_TEST(outdata[2] == 3);
-    BOOST_TEST(outdata[3] == 4);
-    BOOST_TEST(outdata[4] == 5);
-    BOOST_TEST(outdata[5] == 6);
-    BOOST_TEST(outdata[6] == 7);
-    BOOST_TEST(outdata[7] == 8);
-}
-
-BOOST_AUTO_TEST_CASE(TestFullyConnected4dInputReshape)
-{
-    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
-
-    ErrorStatus error;
-    std::vector<bool> sup;
-
-    ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
-        {
-            error = status;
-            sup = supported;
-        };
-
-    Model model = {};
-
-    // operands
-    int32_t actValue      = 0;
-    float   weightValue[] = {1, 0, 0, 0, 0, 0, 0, 0,
-                             0, 1, 0, 0, 0, 0, 0, 0,
-                             0, 0, 1, 0, 0, 0, 0, 0,
-                             0, 0, 0, 1, 0, 0, 0, 0,
-                             0, 0, 0, 0, 1, 0, 0, 0,
-                             0, 0, 0, 0, 0, 1, 0, 0,
-                             0, 0, 0, 0, 0, 0, 1, 0,
-                             0, 0, 0, 0, 0, 0, 0, 1}; //identity
-    float   biasValue[]   = {0, 0, 0, 0, 0, 0, 0, 0};
-
-    // fully connected operation
-    AddInputOperand(model, hidl_vec<uint32_t>{1, 2, 2, 2});
-    AddTensorOperand(model, hidl_vec<uint32_t>{8, 8}, weightValue);
-    AddTensorOperand(model, hidl_vec<uint32_t>{8}, biasValue);
-    AddIntOperand(model, actValue);
-    AddOutputOperand(model, hidl_vec<uint32_t>{1, 8});
-
-    model.operations.resize(1);
-
-    model.operations[0].type = OperationType::FULLY_CONNECTED;
-    model.operations[0].inputs  = hidl_vec<uint32_t>{0,1,2,3};
-    model.operations[0].outputs = hidl_vec<uint32_t>{4};
-
-    // make the prepared model
-    android::sp<IPreparedModel> preparedModel = PrepareModel(model, *driver);
-
-
-    // construct the request
-    DataLocation inloc = {};
-    inloc.poolIndex = 0;
-    inloc.offset    = 0;
-    inloc.length    = 8 * sizeof(float);
-    RequestArgument input = {};
-    input.location = inloc;
-    input.dimensions = hidl_vec<uint32_t>{};
-
-    DataLocation outloc = {};
-    outloc.poolIndex = 1;
-    outloc.offset    = 0;
-    outloc.length    = 8 * sizeof(float);
-    RequestArgument output = {};
-    output.location  = outloc;
-    output.dimensions = hidl_vec<uint32_t>{};
-
-    Request request = {};
-    request.inputs  = hidl_vec<RequestArgument>{input};
-    request.outputs = hidl_vec<RequestArgument>{output};
-
-    // set the input data
-    float indata[] = {1,2,3,4,5,6,7,8};
-    AddPoolAndSetData(8, request, indata);
-
-    // add memory for the output
-    android::sp<IMemory> outMemory = AddPoolAndGetData(8, request);
-    float* outdata = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
-
-    // run the execution
-    Execute(preparedModel, request);
-
-    // check the result
-    BOOST_TEST(outdata[0] == 1);
-    BOOST_TEST(outdata[1] == 2);
-    BOOST_TEST(outdata[2] == 3);
-    BOOST_TEST(outdata[3] == 4);
-    BOOST_TEST(outdata[4] == 5);
-    BOOST_TEST(outdata[5] == 6);
-    BOOST_TEST(outdata[6] == 7);
-    BOOST_TEST(outdata[7] == 8);
-}
-
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/UtilsTests.cpp b/test/UtilsTests.cpp
index 49941e56..b429920c 100644
--- a/test/UtilsTests.cpp
+++ b/test/UtilsTests.cpp
@@ -3,12 +3,10 @@
 // See LICENSE file in the project root for full license information.
 //
 
-#define LOG_TAG "ArmnnDriverUtilsTests"
-//#define BOOST_TEST_MODULE armnn_driver_utils_tests
+#include "DriverTestHelpers.hpp"
 #include <boost/test/unit_test.hpp>
 #include <log/log.h>
 
-#include "../ArmnnDriver.hpp"
 #include "../Utils.hpp"
 
 #include <fstream>
@@ -59,7 +57,7 @@ public:
         m_FileStream.close();
 
         // Ignore any error (such as file not found).
-        remove(m_FileName.c_str());
+        (void)remove(m_FileName.c_str());
     }
 
     bool FileExists()
-- 
cgit v1.2.1