13 files changed, 355 insertions, 3 deletions
diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
index 22dd0d2991..c905eb2c97 100644
--- a/include/armnn/Descriptors.hpp
+++ b/include/armnn/Descriptors.hpp
@@ -303,7 +303,8 @@ struct BatchNormalizationDescriptor
 struct BatchToSpaceNdDescriptor
 {
     BatchToSpaceNdDescriptor()
-        : m_Crops({{0, 0}, {0, 0}})
+        : m_BlockShape({1, 1})
+        , m_Crops({{0, 0}, {0, 0}})
         , m_DataLayout(DataLayout::NCHW)
     {}
 
diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp
index 648bee657d..8cb4e0998a 100755
--- a/src/backends/backendsCommon/test/LayerTests.cpp
+++ b/src/backends/backendsCommon/test/LayerTests.cpp
@@ -7281,6 +7281,60 @@ LayerTestResult<float, 4> BatchToSpaceNdNchwFloat32Test1(
         crops, outputShape, expectedOutput);
 }
 
+LayerTestResult<float, 4> BatchToSpaceNdNchwFloat32Test2(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 1, 1, 1};
+    const unsigned int outputShape[] = {1, 1, 2, 2};
+
+    std::vector<float> input
+            ({
+                     // Batch 0, Height 0, Width (2) x Channel (1)
+                     1.0f, 2.0f, 3.0f, 4.0f
+             });
+
+    std::vector<float> expectedOutput({1.0f,   2.0f,  3.0f,  4.0f});
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<float, 4, 4>(workloadFactory, memoryManager,
+                                             armnn::DataLayout::NCHW, inputShape, input, blockShape,
+                                             crops, outputShape, expectedOutput);
+}
+
+LayerTestResult<float, 4> BatchToSpaceNdNchwFloat32Test3(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 3, 1, 1};
+    const unsigned int outputShape[] = {1, 3, 2, 2};
+
+    std::vector<float> input({ 1.0f, 3.0f, 5.0f, 7.0f, 9.0f, 11.0f, 2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f });
+
+    std::vector<float> expectedOutput
+            ({
+                     // Batch 0, Channel 0, Height (2) x Width (2)
+                     1.0f,  7.0f,
+                     2.0f,  8.0f,
+
+                     // Batch 0, Channel 1, Height (2) x Width (2)
+                     3.0f,  9.0f,
+                     4.0f, 10.0f,
+
+                     // Batch 0, Channel 2, Height (2) x Width (2)
+                     5.0f, 11.0f,
+                     6.0f, 12.0f,
+             });
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<float, 4, 4>(workloadFactory, memoryManager,
+                                             armnn::DataLayout::NCHW, inputShape, input, blockShape,
+                                             crops, outputShape, expectedOutput);
+}
 
 LayerTestResult<uint8_t, 4> BatchToSpaceNdNhwcUintTest1(
     armnn::IWorkloadFactory& workloadFactory,
@@ -7424,3 +7478,131 @@ LayerTestResult<uint8_t, 2> StridedSlice2DReverseUint8Test(
 {
     return StridedSlice2DReverseTest<uint8_t>(workloadFactory, memoryManager);
 }
+LayerTestResult<uint8_t, 4> BatchToSpaceNdNhwcUintTest2(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 1, 1, 1};
+    const unsigned int outputShape[] = {1, 2, 2, 1};
+
+    std::vector<uint8_t> input
+            ({
+                     // Batch 0, Height 0, Width (2) x Channel (1)
+                     1, 2, 3, 4
+             });
+
+    std::vector<uint8_t> expectedOutput({1, 2, 3, 4});
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<uint8_t, 4, 4>(workloadFactory, memoryManager,
+                                               armnn::DataLayout::NHWC, inputShape, input, blockShape,
+                                               crops, outputShape, expectedOutput);
+}
+
+LayerTestResult<uint8_t, 4> BatchToSpaceNdNhwcUintTest3(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 1, 1, 3};
+    const unsigned int outputShape[] = {1, 2, 2, 3};
+
+    std::vector<uint8_t> input({ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 });
+
+    std::vector<uint8_t> expectedOutput({ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 });
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<uint8_t, 4, 4>(workloadFactory, memoryManager,
+                                               armnn::DataLayout::NHWC, inputShape, input, blockShape,
+                                               crops, outputShape, expectedOutput);
+}
+
+
+LayerTestResult<uint8_t, 4> BatchToSpaceNdNchwUintTest1(
+        armnn::IWorkloadFactory &workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 3, 1, 1};
+    const unsigned int outputShape[] = {1, 3, 2, 2};
+
+    std::vector<uint8_t> input({ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 });
+
+    std::vector<uint8_t> expectedOutput
+            ({
+                     // Batch 0, Channel 0, Height (2) x Width (2)
+                     1,  4,
+                     7, 10,
+
+                     // Batch 0, Channel 1, Height (2) x Width (2)
+                     2,  5,
+                     8, 11,
+
+                     // Batch 0, Channel 2, Height (2) x Width (2)
+                     3,  6,
+                     9, 12,
+             });
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<uint8_t, 4, 4>(workloadFactory, memoryManager,
+                                               armnn::DataLayout::NCHW, inputShape, input, blockShape,
+                                               crops, outputShape, expectedOutput);
+}
+
+LayerTestResult<uint8_t, 4> BatchToSpaceNdNchwUintTest2(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 1, 1, 1};
+    const unsigned int outputShape[] = {1, 1, 2, 2};
+
+    std::vector<uint8_t> input
+            ({
+                     // Batch 0, Height 0, Width (2) x Channel (1)
+                     1, 2, 3, 4
+             });
+
+    std::vector<uint8_t> expectedOutput({1, 2, 3, 4});
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<uint8_t, 4, 4>(workloadFactory, memoryManager,
+                                             armnn::DataLayout::NCHW, inputShape, input, blockShape,
+                                             crops, outputShape, expectedOutput);
+}
+
+LayerTestResult<uint8_t, 4> BatchToSpaceNdNchwUintTest3(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const unsigned int inputShape[] = {4, 3, 1, 1};
+    const unsigned int outputShape[] = {1, 3, 2, 2};
+
+    std::vector<uint8_t> input({ 1, 3, 5, 7, 9, 11, 2, 4, 6, 8, 10, 12 });
+
+    std::vector<uint8_t> expectedOutput
+            ({
+                     // Batch 0, Channel 0, Height (2) x Width (2)
+                     1,  7,
+                     2,  8,
+
+                     // Batch 0, Channel 1, Height (2) x Width (2)
+                     3,  9,
+                     4, 10,
+
+                     // Batch 0, Channel 2, Height (2) x Width (2)
+                     5, 11,
+                     6, 12,
+             });
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<uint8_t, 4, 4>(workloadFactory, memoryManager,
+                                               armnn::DataLayout::NCHW, inputShape, input, blockShape,
+                                               crops, outputShape, expectedOutput);
+}
+\ No newline at end of file
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index f851b36712..bfb9e04ad0 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -1068,14 +1068,42 @@ LayerTestResult<float, 4> BatchToSpaceNdNchwFloat32Test1(
     armnn::IWorkloadFactory &workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
 
+LayerTestResult<float, 4> BatchToSpaceNdNchwFloat32Test2(
+        armnn::IWorkloadFactory &workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> BatchToSpaceNdNchwFloat32Test3(
+        armnn::IWorkloadFactory &workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
 LayerTestResult<uint8_t, 4> BatchToSpaceNdNhwcUintTest1(
     armnn::IWorkloadFactory &workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
 
+LayerTestResult<uint8_t, 4> BatchToSpaceNdNhwcUintTest2(
+        armnn::IWorkloadFactory &workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> BatchToSpaceNdNhwcUintTest3(
+        armnn::IWorkloadFactory &workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> BatchToSpaceNdNchwUintTest1(
+        armnn::IWorkloadFactory &workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> BatchToSpaceNdNchwUintTest2(
+        armnn::IWorkloadFactory &workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> BatchToSpaceNdNchwUintTest3(
+        armnn::IWorkloadFactory &workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
 LayerTestResult<float, 4> StridedSlice4DFloat32Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
+    
 LayerTestResult<float, 4> StridedSlice4DReverseFloat32Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index 218cc9b3d9..a877a5f25f 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -18,6 +18,7 @@
 #include "workloads/ClAdditionWorkload.hpp"
 #include "workloads/ClActivationWorkload.hpp"
 #include "workloads/ClBatchNormalizationFloatWorkload.hpp"
+#include "workloads/ClBatchToSpaceNdWorkload.hpp"
 #include "workloads/ClConvertFp16ToFp32Workload.hpp"
 #include "workloads/ClConvertFp32ToFp16Workload.hpp"
 #include "workloads/ClConvolution2dWorkload.hpp"
@@ -165,6 +166,18 @@ bool ClLayerSupport::IsBatchNormalizationSupported(const TensorInfo& input,
                                    descriptor);
 }
 
+bool ClLayerSupport::IsBatchToSpaceNdSupported(const TensorInfo& input,
+                                               const TensorInfo& output,
+                                               const BatchToSpaceNdDescriptor& descriptor,
+                                               Optional<std::string&> reasonIfUnsupported) const
+{
+    FORWARD_WORKLOAD_VALIDATE_FUNC(ClBatchToSpaceNdWorkloadValidate,
+                                   reasonIfUnsupported,
+                                   input,
+                                   output,
+                                   descriptor);
+}
+
 bool ClLayerSupport::IsConstantSupported(const TensorInfo& output,
                                          Optional<std::string&> reasonIfUnsupported) const
 {
diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp
index b4d278995d..cd1b72286a 100644
--- a/src/backends/cl/ClLayerSupport.hpp
+++ b/src/backends/cl/ClLayerSupport.hpp
@@ -31,6 +31,11 @@ public:
                                        const BatchNormalizationDescriptor& descriptor,
                                        Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsBatchToSpaceNdSupported(const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const BatchToSpaceNdDescriptor& descriptor,
+                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsConstantSupported(const TensorInfo& output,
                              Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index 6199648d84..3c2e2875b4 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -311,7 +311,7 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
                                                                    const WorkloadInfo& info) const
 {
-    return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+    return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor, info);
 }
 
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk
index 551e1a0e4b..f0019790b0 100644
--- a/src/backends/cl/backend.mk
+++ b/src/backends/cl/backend.mk
@@ -17,6 +17,7 @@ BACKEND_SOURCES := \
         workloads/ClActivationWorkload.cpp \
         workloads/ClAdditionWorkload.cpp \
         workloads/ClBatchNormalizationFloatWorkload.cpp \
+        workloads/ClBatchToSpaceNdWorkload.cpp \
         workloads/ClConstantWorkload.cpp \
         workloads/ClConvertFp16ToFp32Workload.cpp \
         workloads/ClConvertFp32ToFp16Workload.cpp \
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index b9b26aca2d..862648e4cc 100755
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -42,6 +42,23 @@ ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f)
 ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest)
 ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest)
 
+// Batch To Space
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcFloat321, BatchToSpaceNdNhwcFloat32Test1)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcFloat322, BatchToSpaceNdNhwcFloat32Test2)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcFloat323, BatchToSpaceNdNhwcFloat32Test3)
+
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwFloat321, BatchToSpaceNdNchwFloat32Test1)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwFloat322, BatchToSpaceNdNchwFloat32Test2)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwFloat323, BatchToSpaceNdNchwFloat32Test3)
+
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcUint1, BatchToSpaceNdNhwcUintTest1)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcUint2, BatchToSpaceNdNhwcUintTest2)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcUint3, BatchToSpaceNdNhwcUintTest3)
+
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwUint1, BatchToSpaceNdNchwUintTest1)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwUint2, BatchToSpaceNdNchwUintTest2)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwUint3, BatchToSpaceNdNchwUintTest3)
+
 // Fully Connected
 ARMNN_AUTO_TEST_CASE(SimpleFullyConnected, FullyConnectedFloat32Test, false, false)
 ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, true, false)
diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt
index 736cf5c4e5..b7571f6895 100644
--- a/src/backends/cl/workloads/CMakeLists.txt
+++ b/src/backends/cl/workloads/CMakeLists.txt
@@ -10,6 +10,8 @@ list(APPEND armnnClBackendWorkloads_sources
     ClAdditionWorkload.hpp
     ClBatchNormalizationFloatWorkload.cpp
     ClBatchNormalizationFloatWorkload.hpp
+    ClBatchToSpaceNdWorkload.cpp
+    ClBatchToSpaceNdWorkload.hpp
     ClConstantWorkload.cpp
     ClConstantWorkload.hpp
     ClConvertFp16ToFp32Workload.cpp
diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
new file mode 100644
index 0000000000..a714e031e4
--- /dev/null
+++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
@@ -0,0 +1,64 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClBatchToSpaceNdWorkload.hpp"
+
+#include <cl/ClTensorHandle.hpp>
+#include <backendsCommon/CpuTensorHandle.hpp>
+#include <aclCommon/ArmComputeTensorUtils.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& desc,
+                                                   const WorkloadInfo& info)
+                                                   : BaseWorkload<BatchToSpaceNdQueueDescriptor>(desc, info)
+{
+    m_Data.ValidateInputsOutputs("ClBatchToSpaceNdWorkload", 1, 1);
+
+    arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
+
+    arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    input.info()->set_data_layout(aclDataLayout);
+
+    // ArmNN blockShape is [H, W] Cl asks for W, H
+    int32_t blockHeight = boost::numeric_cast<int32_t>(desc.m_Parameters.m_BlockShape[0]);
+    int32_t blockWidth = boost::numeric_cast<int32_t>(desc.m_Parameters.m_BlockShape[1]);
+
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    output.info()->set_data_layout(aclDataLayout);
+
+    m_Layer.configure(&input, blockWidth, blockHeight, &output);
+}
+
+void ClBatchToSpaceNdWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchToSpaceNdWorkload_Execute");
+    RunClFunction(m_Layer, CHECK_LOCATION());
+}
+
+arm_compute::Status ClBatchToSpaceNdWorkloadValidate(const TensorInfo& input,
+                                                     const TensorInfo& output,
+                                                     const BatchToSpaceNdDescriptor& desc) {
+    DataLayout dataLayout = desc.m_DataLayout;
+    const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
+
+    // ArmNN blockShape is [H, W] Cl asks for W, H
+    int32_t blockHeight = boost::numeric_cast<int32_t>(desc.m_BlockShape[0]);
+    int32_t blockWidth = boost::numeric_cast<int32_t>(desc.m_BlockShape[1]);
+
+    const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
+
+    const arm_compute::Status aclStatus = arm_compute::CLBatchToSpaceLayer::validate(&aclInputInfo,
+                                                                                     blockWidth,
+                                                                                     blockHeight,
+                                                                                     &aclOutputInfo);
+    return aclStatus;
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp
new file mode 100644
index 0000000000..4db84a2787
--- /dev/null
+++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClBatchToSpaceNdWorkloadValidate(const TensorInfo& input,
+                                                     const TensorInfo& output,
+                                                     const BatchToSpaceNdDescriptor& desc);
+
+class ClBatchToSpaceNdWorkload : public BaseWorkload<BatchToSpaceNdQueueDescriptor>
+{
+public:
+    ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+    void Execute() const override;
+
+private:
+
+    mutable arm_compute::CLBatchToSpaceLayer m_Layer;
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp
index eeca40364c..39ecc53dc3 100644
--- a/src/backends/cl/workloads/ClWorkloads.hpp
+++ b/src/backends/cl/workloads/ClWorkloads.hpp
@@ -8,6 +8,7 @@
 #include "ClAdditionWorkload.hpp"
 #include "ClConstantWorkload.hpp"
 #include "ClBatchNormalizationFloatWorkload.hpp"
+#include "ClBatchToSpaceNdWorkload.hpp"
 #include "ClConvolution2dWorkload.hpp"
 #include "ClDepthwiseConvolutionWorkload.hpp"
 #include "ClDivisionFloatWorkload.hpp"
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index b16849929e..df52d1df12 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -409,8 +409,16 @@ ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcFloat322, BatchToSpaceNdNhwcFloat32Test2)
 ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcFloat323, BatchToSpaceNdNhwcFloat32Test3)
 
 ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwFloat321, BatchToSpaceNdNchwFloat32Test1)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwFloat322, BatchToSpaceNdNchwFloat32Test2)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwFloat323, BatchToSpaceNdNchwFloat32Test3)
 
 ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcUint1, BatchToSpaceNdNhwcUintTest1)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcUint2, BatchToSpaceNdNhwcUintTest2)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcUint3, BatchToSpaceNdNhwcUintTest3)
+
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwUint1, BatchToSpaceNdNchwUintTest1)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwUint2, BatchToSpaceNdNchwUintTest2)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwUint3, BatchToSpaceNdNchwUintTest3)
 
 // Strided Slice
 ARMNN_AUTO_TEST_CASE(StridedSlice4DFloat32, StridedSlice4DFloat32Test)