From 1710133b05a9ed1fcc1cc68624c2ce0e09eae495 Mon Sep 17 00:00:00 2001
From: Michele Di Giorgio <michele.digiorgio@arm.com>
Date: Mon, 1 Jun 2020 12:07:50 +0100
Subject: COMPMID-2395: Add support for U8 datatype to CropResize on NEON

Change-Id: Ia356b3545e01ccc98056951f3a20ed2712240dac
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3283
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/NEON/kernels/NECropKernel.h      |  4 ++--
 arm_compute/runtime/NEON/functions/NECropResize.h |  7 +++---
 src/core/NEON/kernels/NECropKernel.cpp            | 11 ++++++++-
 tests/validation/NEON/CropResize.cpp              | 27 +++++++++++++++--------
 tests/validation/reference/CropResize.cpp         |  4 +++-
 5 files changed, 36 insertions(+), 17 deletions(-)
diff --git a/arm_compute/core/NEON/kernels/NECropKernel.h b/arm_compute/core/NEON/kernels/NECropKernel.h
index 557a7a8ff3..ba58ab1e58 100644
--- a/arm_compute/core/NEON/kernels/NECropKernel.h
+++ b/arm_compute/core/NEON/kernels/NECropKernel.h
@@ -58,7 +58,7 @@ public:
      * @note Supported tensor rank: up to 4
      * @note Padding not supported.
      *
-     * @param[in]  input               Source tensor. Data type supported: U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
+     * @param[in]  input               Source tensor. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
      * @param[in]  crop_boxes          Tensor containing all possible boxes used to crop the image, each represented by 4 normalized values.
      *                                 Data type supported: F32
      * @param[in]  box_ind             One dimensional tensor mapping the @p crop_box_ind to the index of the 3D image in @p input.
@@ -74,7 +74,7 @@ public:
      * @note Supported tensor rank: up to 4
      * @note Padding not supported.
      *
-     * @param[in] input               Source tensor info. Data type supported: U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
+     * @param[in] input               Source tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
      * @param[in] crop_boxes          Tensor info for tensor containing all possible boxes used to crop the image. Data type supported: F32
      * @param[in] box_ind             Tensor info for the one dimensional tensor mapping the @p crop_box_ind to the index of the 3D image
      *                                in @p input. Data type supported: F32
diff --git a/arm_compute/runtime/NEON/functions/NECropResize.h b/arm_compute/runtime/NEON/functions/NECropResize.h
index 9f961a13e6..1c15beded4 100644
--- a/arm_compute/runtime/NEON/functions/NECropResize.h
+++ b/arm_compute/runtime/NEON/functions/NECropResize.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,7 +27,6 @@
 #include "arm_compute/core/NEON/kernels/NECropKernel.h"
 #include "arm_compute/runtime/NEON/functions/NEScale.h"
 
-#include <cstdint>
 #include <memory>
 
 namespace arm_compute
@@ -58,7 +57,7 @@ public:
      * @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used.
      * @note Start and end indices of boxes are inclusive.
      *
-     * @param[in]  input               Source tensor containing N batches of 3D images to be cropped. Data type supported: U16/S16/U32/S32/F16/F32
+     * @param[in]  input               Source tensor containing N batches of 3D images to be cropped. Data type supported: U8/U16/S16/U32/S32/F16/F32
      * @param[in]  boxes               Tensor containing the boxes used to crop the images. Data type supported: F32
      * @param[in]  box_ind             One dimensional tensor containing the batch index of the 3D image in @p input that the corresponding
      *                                 box in @p boxes will be applied to. Data type supported: F32
@@ -76,7 +75,7 @@ public:
      * @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used.
      * @note Start and end indices of boxes are inclusive.
      *
-     * @param[in] input               Source tensor containing N batches of 3D images to be cropped. Data type supported: U16/S16/U32/S32/F16/F32
+     * @param[in] input               Source tensor containing N batches of 3D images to be cropped. Data type supported: U8/U16/S16/U32/S32/F16/F32
      * @param[in] boxes               Tensor info for the tensor containing the boxes used to crop the images. Data type supported: F32
      * @param[in] box_ind             Tensor info for the one dimensional tensor containing the batch index of the 3D image in @p input
      *                                that the corresponding box in @p boxes will be applied to. Data type supported: F32
diff --git a/src/core/NEON/kernels/NECropKernel.cpp b/src/core/NEON/kernels/NECropKernel.cpp
index 4257611f0e..21a8436289 100644
--- a/src/core/NEON/kernels/NECropKernel.cpp
+++ b/src/core/NEON/kernels/NECropKernel.cpp
@@ -76,6 +76,12 @@ inline float32x4_t load_as_f32(uint16_t *ptr)
     return vcvtq_f32_u32(vmovl_u16(wrapper::vload(ptr)));
 }
 
+template <>
+inline float32x4_t load_as_f32(uint8_t *ptr)
+{
+    return vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(wrapper::vload(ptr)))));
+}
+
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 template <>
 inline float32x4_t load_as_f32(float16_t *ptr)
@@ -267,6 +273,9 @@ void NECropKernel::configure(const ITensor *input, const ITensor *crop_boxes, co
         case DataType::S16:
             _in_bounds_crop_function = &in_bounds_crop_window<int16_t>;
             break;
+        case DataType::U8:
+            _in_bounds_crop_function = &in_bounds_crop_window<uint8_t>;
+            break;
         default:
             ARM_COMPUTE_ERROR("Datatype not supported");
     }
@@ -276,7 +285,7 @@ Status NECropKernel::validate(const ITensorInfo *input, const ITensorInfo *crop_
 {
     ARM_COMPUTE_UNUSED(extrapolation_value);
     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16, DataType::S16, DataType::F16, DataType::U32, DataType::S32, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::U16, DataType::S16, DataType::F16, DataType::U32, DataType::S32, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NHWC);
     ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().num_dimensions() > 4);
     ARM_COMPUTE_RETURN_ERROR_ON(crop_boxes->tensor_shape()[0] != 4);
diff --git a/tests/validation/NEON/CropResize.cpp b/tests/validation/NEON/CropResize.cpp
index 1feed3d9d2..c6a1046b7e 100644
--- a/tests/validation/NEON/CropResize.cpp
+++ b/tests/validation/NEON/CropResize.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -52,7 +52,6 @@ using NECropResizeFixture = CropResizeFixture<Tensor, Accessor, NECropResize, T>
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                framework::dataset::make("InputInfo", { TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32),
-                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::U8),  // Invalid input data type.
                                                        TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid box_ind shape.
                                                        TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid output shape.
                                                        TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid output data type.
@@ -64,11 +63,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                                                        TensorInfo(TensorShape(4, 20), 1, DataType::F32),
                                                        TensorInfo(TensorShape(4, 20), 1, DataType::F32),
                                                        TensorInfo(TensorShape(4, 20), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
                                                        TensorInfo(TensorShape(3, 20), 1, DataType::F32),
                                                      })),
                framework::dataset::make("BoxIndInfo",{ TensorInfo(TensorShape(20), 1, DataType::S32),
-                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
                                                        TensorInfo(TensorShape(10), 1, DataType::S32),
                                                        TensorInfo(TensorShape(20), 1, DataType::S32),
                                                        TensorInfo(TensorShape(20), 1, DataType::S32),
@@ -76,14 +73,13 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                                                        TensorInfo(TensorShape(20), 1, DataType::S32),
                                                      })),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(15U, 5, 5, 10U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::S32),
                                                        TensorInfo(TensorShape(5U, 5, 5, 20U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("Expected", { true, false, false, false, false, false, false})),
+               framework::dataset::make("Expected", { true, false, false, false, false, false})),
                input, boxes, box_ind, output, expected)
 {
     ARM_COMPUTE_EXPECT(bool(NECropResize::validate(&input.clone()->set_data_layout(DataLayout::NHWC).set_is_resizable(false),
@@ -100,7 +96,7 @@ TEST_SUITE(Float)
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NECropResizeFixture<half>,
-                       framework::DatasetMode::PRECOMMIT,
+                       framework::DatasetMode::ALL,
                        combine(datasets::SmallCropResizeDataset(),
                                combine(framework::dataset::make("IsOutOfBounds", { true, false }),
                                        framework::dataset::make("DataType", DataType::F16))))
@@ -114,7 +110,7 @@ TEST_SUITE_END() // F16
 TEST_SUITE(F32)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NECropResizeFixture<float>,
-                       framework::DatasetMode::PRECOMMIT,
+                       framework::DatasetMode::ALL,
                        combine(datasets::SmallCropResizeDataset(),
                                combine(framework::dataset::make("IsOutOfBounds", { true, false }),
                                        framework::dataset::make("DataType", DataType::F32))))
@@ -125,10 +121,23 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
 TEST_SUITE_END() // F32
 TEST_SUITE_END() // Float
 
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NECropResizeFixture<uint8_t>,
+                       framework::DatasetMode::ALL,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::U8))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // U8
+
 TEST_SUITE(U16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NECropResizeFixture<uint16_t>,
-                       framework::DatasetMode::PRECOMMIT,
+                       framework::DatasetMode::ALL,
                        combine(datasets::SmallCropResizeDataset(),
                                combine(framework::dataset::make("IsOutOfBounds", { true, false }),
                                        framework::dataset::make("DataType", DataType::U16))))
diff --git a/tests/validation/reference/CropResize.cpp b/tests/validation/reference/CropResize.cpp
index 68ee4557fb..f1b49f6673 100644
--- a/tests/validation/reference/CropResize.cpp
+++ b/tests/validation/reference/CropResize.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -193,6 +193,8 @@ template SimpleTensor<float> crop_and_resize(const SimpleTensor<int32_t> &src, c
                                              Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
 template SimpleTensor<float> crop_and_resize(const SimpleTensor<half> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
                                              Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<uint8_t> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
 } // namespace reference
 } // namespace validation
 } // namespace test
-- 
cgit v1.2.1