COMPMID-1843: Implement NECrop

Change-Id: I27e8b1a00c2315c72106e8e596f84ad48fb770e3 Signed-off-by: George Wort <george.wort@arm.com> Reviewed-on: https://review.mlplatform.org/c/648 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
author: George Wort <george.wort@arm.com> 2019-01-25 15:38:33 +0000
committer: Pablo Marquez <pablo.tello@arm.com> 2019-03-05 11:21:01 +0000
commit: 05398a948a2b43584b16d91f6efdda9eb361ec74 (patch)
tree: 01963cd67610dd69915076be8577b28e025eb848
parent: f112ede50530374b48ea2f87c1f0e02262cffc78 (diff)
download: ComputeLibrary-05398a948a2b43584b16d91f6efdda9eb361ec74.tar.gz
15 files changed, 1590 insertions, 84 deletions
diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h
index 8b37b2f603..f1d94c89db 100644
--- a/arm_compute/core/NEON/NEKernels.h
+++ b/arm_compute/core/NEON/NEKernels.h
@@ -46,6 +46,7 @@
 #include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
 #include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
 #include "arm_compute/core/NEON/kernels/NECopyKernel.h"
+#include "arm_compute/core/NEON/kernels/NECropKernel.h"
 #include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
diff --git a/arm_compute/core/NEON/kernels/NECropKernel.h b/arm_compute/core/NEON/kernels/NECropKernel.h
new file mode 100644
index 0000000000..6713a40c86
--- /dev/null
+++ b/arm_compute/core/NEON/kernels/NECropKernel.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEON_CROP_KERNEL_H__
+#define __ARM_COMPUTE_NEON_CROP_KERNEL_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Types.h"
+
+#include <cstdint>
+#include <map>
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Interface for the kernel to perform tensor cropping */
+class NECropKernel : public INEKernel
+{
+public:
+    const char *name() const override
+    {
+        return "NECropKernel";
+    }
+    /** Default constructor */
+    NECropKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NECropKernel(const NECropKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NECropKernel &operator=(const NECropKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    NECropKernel(NECropKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    NECropKernel &operator=(NECropKernel &&) = default;
+    /** Default destructor */
+    ~NECropKernel() = default;
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     * @note Padding not supported.
+     *
+     * @param[in]  input               Source tensor. Data type supported: U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
+     * @param[in]  crop_boxes          Tensor containing all possible boxes used to crop the image, each represented by 4 normalized values.
+     *                                 Data type supported: F32
+     * @param[in]  box_ind             One dimensional tensor mapping the @p crop_box_ind to the index of the 3D image in @p input.
+     *                                 Data type supported: F32
+     * @param[out] output              Destination tensor. Data type supported: F32
+     * @param[in]  crop_box_ind        Index of the crop box to be used from @p crop_boxes. Default is 0.
+     * @param[in]  extrapolation_value Value to be used for values outside of the image. Default is 0.
+     */
+    void configure(const ITensor *input, const ITensor *crop_boxes, const ITensor *box_ind, ITensor *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0);
+
+    /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
+     *
+     * @note Supported tensor rank: up to 4
+     * @note Padding not supported.
+     *
+     * @param[in] input               Source tensor info. Data type supported: U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
+     * @param[in] crop_boxes          Tensor info for tensor containing all possible boxes used to crop the image. Data type supported: F32
+     * @param[in] box_ind             Tensor info for the one dimensional tensor mapping the @p crop_box_ind to the index of the 3D image
+     *                                in @p input. Data type supported: F32
+     * @param[in] output              Destination tensor. Data type supported: F32
+     * @param[in] crop_box_ind        Index of the crop box to be used from @p crop_boxes. Default is 0.
+     * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *crop_boxes, const ITensorInfo *box_ind, const ITensorInfo *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0);
+
+    /** Configure output tensor's shape as this can only be determined at runtime. */
+    void configure_output_shape();
+
+    // Inherited methods overridden:
+    void run(const Window &window, const ThreadInfo &info) override;
+
+    /** Function to use for in bounds crop for the particular tensor types passed to configure() */
+    using InBoundsCropFunction = void(const ITensor *, const ITensor *, float *, Coordinates, int32_t, int32_t, int32_t);
+
+private:
+    const ITensor *_input;
+    const ITensor *_crop_boxes;
+    const ITensor *_box_ind;
+    ITensor       *_output;
+
+    Coordinates _start;
+    Coordinates _end;
+    uint32_t    _crop_box_ind;
+    float       _extrapolation_value;
+    /** The number of rows out of bounds at the start and end of output. */
+    uint32_t _rows_out_of_bounds[2];
+    /** The number of columns out of bounds at the start and end of output. */
+    uint32_t _cols_out_of_bounds[2];
+
+    std::pair<NECropKernel::InBoundsCropFunction *, NECropKernel::InBoundsCropFunction *> _in_bounds_crop_functions;
+    NECropKernel::InBoundsCropFunction *_in_bounds_crop_function;
+
+    using CropFunction = void(const ITensor *, const ITensor *, Coordinates, float, const uint32_t *, const uint32_t *,
+                              NECropKernel::InBoundsCropFunction *);
+
+    NECropKernel::CropFunction *_crop_function;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NEON_CROP_KERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h
index 83d99643dc..b132bb57b6 100644
--- a/arm_compute/core/NEON/kernels/NEScaleKernel.h
+++ b/arm_compute/core/NEON/kernels/NEScaleKernel.h
@@ -55,33 +55,41 @@ public:
     /** Initialise the kernel's inputs, output and interpolation policy
      *
      * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
+     * @note Using @p policy Area only supports data layout NCHW and input data type U8.
      *
-     * @param[in]  input           Source tensor. Data types supported: U8/S16/F16/F32.
-     * @param[in]  dx              Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
-     * @param[in]  dy              Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
-     * @param[in]  offsets         Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
-     * @param[out] output          Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
-     * @param[in]  policy          Interpolation type to use
-     * @param[in]  border_mode     Border mode policy
-     * @param[in]  sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
+     * @param[in]  input                 Source tensor. Data types supported: U8/S16/F16/F32.
+     * @param[in]  dx                    Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
+     * @param[in]  dy                    Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
+     * @param[in]  offsets               Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
+     * @param[out] output                Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+     * @param[in]  policy                Interpolation type to use
+     * @param[in]  border_mode           Border mode policy
+     * @param[in]  constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT and use_padding is set to false.
+     * @param[in]  sampling_policy       (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
+     * @param[in]  use_padding           (Optional) Is padding in use or not. Defaults to true.
      */
     void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output,
-                   InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy = SamplingPolicy::CENTER);
+                   InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(),
+                   SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true);
     /** Static function to check if given info will lead to a valid configuration of @ref NEScaleKernel
      *
      * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
+     * @note Using @p policy Area only supports data layout NCHW and input data type U8.
      *
-     * @param[in] input           Source tensor. Data types supported: U8/S16/F16/F32.
-     * @param[in] dx              Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
-     * @param[in] dy              Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
-     * @param[in] offsets         Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
-     * @param[in] output          Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
-     * @param[in] policy          Interpolation type to use
-     * @param[in] border_mode     Border mode policy
-     * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
+     * @param[in] input                 Source tensor. Data types supported: U8/S16/F16/F32.
+     * @param[in] dx                    Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
+     * @param[in] dy                    Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
+     * @param[in] offsets               Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
+     * @param[in] output                Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+     * @param[in] policy                Interpolation type to use
+     * @param[in] border_mode           Border mode policy
+     * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT and use_padding is set to false.
+     * @param[in] sampling_policy       (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
+     * @param[in] use_padding           (Optional) Is padding in use or not. Defaults to true.
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy, const ITensorInfo *offsets, ITensorInfo *output,
-                           InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy = SamplingPolicy::CENTER);
+                           InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(),
+                           SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
@@ -110,7 +118,9 @@ private:
     InterpolationPolicy _policy;
     BorderSize          _border_size;
     BorderMode          _border_mode;
+    PixelValue          _constant_border_value;
     float               _sampling_offset;
+    bool                _use_padding;
 };
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_NESCALEKERNEL_H__ */
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index 15ce4e3d66..432c751308 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -49,6 +49,7 @@
 #include "arm_compute/runtime/NEON/functions/NEConvolution.h"
 #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
 #include "arm_compute/runtime/NEON/functions/NECopy.h"
+#include "arm_compute/runtime/NEON/functions/NECropResize.h"
 #include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
diff --git a/arm_compute/runtime/NEON/functions/NECropResize.h b/arm_compute/runtime/NEON/functions/NECropResize.h
new file mode 100644
index 0000000000..e790e68b5f
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NECropResize.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEON_CROP_RESIZE_H__
+#define __ARM_COMPUTE_NEON_CROP_RESIZE_H__
+
+#include "arm_compute/core/NEON/kernels/NECropKernel.h"
+#include "arm_compute/runtime/NEON/functions/NEScale.h"
+
+#include <cstdint>
+#include <memory>
+
+namespace arm_compute
+{
+// Forward Declarations
+class ITensor;
+
+/** Function to perform cropping and resizing */
+class NECropResize : public IFunction
+{
+public:
+    /** Default constructor */
+    NECropResize();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NECropResize(const NECropResize &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NECropResize &operator=(const NECropResize &) = delete;
+    /** Allow instances of this class to be moved */
+    NECropResize(NECropResize &&) = default;
+    /** Allow instances of this class to be moved */
+    NECropResize &operator=(NECropResize &&) = default;
+    /** Default destructor */
+    virtual ~NECropResize() = default;
+
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     * @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used.
+     * @note Start and end indices of boxes are inclusive.
+     *
+     * @param[in]  input               Source tensor containing N batches of 3D images to be cropped. Data type supported: U16/S16/U32/S32/F16/F32
+     * @param[in]  boxes               Tensor containing the boxes used to crop the images. Data type supported: F32
+     * @param[in]  box_ind             One dimensional tensor containing the batch index of the 3D image in @p input that the corresponding
+     *                                 box in @p boxes will be applied to. Data type supported: F32
+     * @param[out] output              Destination tensor containing a cropped and resized image for each box in @p boxes. Data type supported: F32
+     * @param[in]  crop_size           The dimensions that each cropped image will be resized to.
+     * @param[in]  method              The policy to be used when resizing image. Default is bilinear.
+     * @param[in]  extrapolation_value Value to be used for values outside of the image for cropping and resizing. Default is 0.
+     */
+    void configure(const ITensor *input, const ITensor *boxes, const ITensor *box_ind, ITensor *output, Coordinates2D crop_size,
+                   InterpolationPolicy method = InterpolationPolicy::BILINEAR, float extrapolation_value = 0);
+
+    /** Static function to check if given info will lead to a valid configuration of @ref NESlice
+     *
+     * @note Supported tensor rank: up to 4
+     * @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used.
+     * @note Start and end indices of boxes are inclusive.
+     *
+     * @param[in] input               Source tensor containing N batches of 3D images to be cropped. Data type supported: U16/S16/U32/S32/F16/F32
+     * @param[in] boxes               Tensor info for the tensor containing the boxes used to crop the images. Data type supported: F32
+     * @param[in] box_ind             Tensor info for the one dimensional tensor containing the batch index of the 3D image in @p input
+     *                                that the corresponding box in @p boxes will be applied to. Data type supported: F32
+     * @param[in] output              Tensor info for the destination tensor containing a cropped and resized image for each box in @p boxes.
+     *                                Data type supported: F32
+     * @param[in] crop_size           The dimensions that each cropped image will be resized to.
+     * @param[in] method              The policy to be used when resizing image. Default is bilinear.
+     * @param[in] extrapolation_value Value to be used for values outside of the image for cropping and resizing. Default is 0.
+     *
+     * @return A status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *boxes, const ITensorInfo *box_ind, const ITensorInfo *output,
+                           Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+
+    void run() override;
+
+    ITensor            *_output;
+    size_t              _num_boxes;
+    InterpolationPolicy _method;
+    float               _extrapolation_value;
+
+    std::unique_ptr<NECropKernel[]> _crop;
+    std::unique_ptr<NEScale[]>      _scale;
+    std::unique_ptr<Tensor[]>       _crop_results{ nullptr };
+    std::unique_ptr<Tensor[]>       _scaled_results{ nullptr };
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEON_CROP_RESIZE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h
index d59e3cccb6..d7dfbbfc9f 100644
--- a/arm_compute/runtime/NEON/functions/NEScale.h
+++ b/arm_compute/runtime/NEON/functions/NEScale.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,9 +53,10 @@ public:
      * @param[in]      border_mode           Strategy to use for borders.
      * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      * @param[in]      sampling_policy       (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
+     * @param[in]      use_padding           (Optional) Is padding in use or not. Defaults to true.
      */
     void configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(),
-                   SamplingPolicy sampling_policy = SamplingPolicy::CENTER);
+                   SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true);
     /** Static function to check if given info will lead to a valid configuration of @ref NEScale
      *
      * @param[in] input                 Source tensor. Data type supported: U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
@@ -64,11 +65,12 @@ public:
      * @param[in] border_mode           Strategy to use for borders.
      * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      * @param[in] sampling_policy       (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
+     * @param[in] use_padding           (Optional) Is padding in use or not. Defaults to true.
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy, BorderMode border_mode,
-                           PixelValue constant_border_value = PixelValue(), SamplingPolicy sampling_policy = SamplingPolicy::CENTER);
+                           PixelValue constant_border_value = PixelValue(), SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true);
 
     // Inherited methods overridden:
     void run() override;
@@ -79,6 +81,7 @@ private:
     Tensor             _dy;             /**< Element's distance between the Y real coordinate and the smallest Y following integer */
     NEScaleKernel      _scale_kernel;   /**< Kernel to perform the scaling */
     NEFillBorderKernel _border_handler; /**< kernel to handle tensor borders */
+    bool               _use_padding;    /**< Is padding used on the tensors */
 };
 }
 #endif /*__ARM_COMPUTE_NESCALEIMAGE_H__ */
diff --git a/src/core/NEON/kernels/NECropKernel.cpp b/src/core/NEON/kernels/NECropKernel.cpp
new file mode 100644
index 0000000000..b6fe5819e4
--- /dev/null
+++ b/src/core/NEON/kernels/NECropKernel.cpp
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/NEON/kernels/NECropKernel.h"
+
+#include "arm_compute/core/CPP/Validate.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Window.h"
+
+#include "arm_compute/core/NEON/wrapper/wrapper.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/helpers/bit_ops.h"
+#include "arm_compute/core/utils/helpers/tensor_transform.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include <map>
+
+namespace arm_compute
+{
+namespace
+{
+template <typename T>
+inline float32x4_t load_as_f32(T *ptr)
+{
+    ARM_COMPUTE_UNUSED(ptr);
+    ARM_COMPUTE_ERROR("Type not supported.");
+}
+
+template <>
+inline float32x4_t load_as_f32(float *ptr)
+{
+    return wrapper::vloadq(ptr);
+}
+
+template <>
+inline float32x4_t load_as_f32(int32_t *ptr)
+{
+    return vcvtq_f32_s32(wrapper::vloadq(ptr));
+}
+
+template <>
+inline float32x4_t load_as_f32(uint32_t *ptr)
+{
+    return vcvtq_f32_u32(wrapper::vloadq(ptr));
+}
+
+template <>
+inline float32x4_t load_as_f32(int16_t *ptr)
+{
+    return vcvtq_f32_s32(vmovl_s16(wrapper::vload(ptr)));
+}
+
+template <>
+inline float32x4_t load_as_f32(uint16_t *ptr)
+{
+    return vcvtq_f32_u32(vmovl_u16(wrapper::vload(ptr)));
+}
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+template <>
+inline float32x4_t load_as_f32(float16_t *ptr)
+{
+    return vcvt_f32_f16(wrapper::vload(ptr));
+}
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+template <typename T, bool input_has_single_channel, bool is_width_flipped>
+inline void in_bounds_crop_window(const ITensor *input, const ITensor *output, float *output_ptr, Coordinates input_offset,
+                                  int32_t window_step_x, int32_t output_width_start, int32_t output_width_limit)
+{
+    // Reverse elements if width flipped.
+    if(is_width_flipped)
+    {
+        // Collapse first dimension if possible.
+        if(input_has_single_channel)
+        {
+            int32_t     x = output_width_start;
+            Coordinates negative_offset(input_offset);
+            negative_offset.set(1, negative_offset[1] - window_step_x + 1);
+            for(; x <= output_width_limit - window_step_x; x += window_step_x, negative_offset[1] -= window_step_x)
+            {
+                auto in = load_as_f32(reinterpret_cast<T *>(input->ptr_to_element(negative_offset)));
+
+                in = wrapper::vrev64(in);
+                in = wrapper::vcombine(wrapper::vgethigh(in), wrapper::vgetlow(in));
+
+                wrapper::vstore(output_ptr + x, in);
+            }
+            input_offset[1] = negative_offset[1] + window_step_x - 1;
+            for(; x < output_width_limit; ++x, --input_offset[1])
+            {
+                *(output_ptr + x) = static_cast<float>(*reinterpret_cast<T *>(input->ptr_to_element(input_offset)));
+            }
+        }
+        else
+        {
+            for(int32_t x = output_width_start; x < output_width_limit; ++x, --input_offset[1])
+            {
+                input_offset.set(0, 0);
+                int32_t c = 0;
+                for(; c <= static_cast<int32_t>(input->info()->dimension(0)) - window_step_x; c += window_step_x, input_offset[0] += window_step_x)
+                {
+                    auto in = load_as_f32(reinterpret_cast<T *>(input->ptr_to_element(input_offset)));
+                    wrapper::vstore(output_ptr + x * output->info()->dimension(0) + c, in);
+                }
+                for(; c < static_cast<int32_t>(input->info()->dimension(0)); ++c, ++input_offset[0])
+                {
+                    *(output_ptr + x * output->info()->dimension(0) + c) = static_cast<float>(*reinterpret_cast<T *>(input->ptr_to_element(input_offset)));
+                }
+            }
+        }
+    }
+    else
+    {
+        // Use memcpy if the elements don't need converting to float.
+        if(std::is_same<T, float>::value)
+        {
+            memcpy(static_cast<void *>(output_ptr + output_width_start * output->info()->dimension(0)),
+                   reinterpret_cast<const void *>(input->ptr_to_element(input_offset)),
+                   (output_width_limit - output_width_start) * output->info()->dimension(0) * output->info()->element_size());
+        }
+        else
+        {
+            int32_t x                = 0;
+            int32_t limit            = (output_width_limit - output_width_start) * static_cast<int32_t>(output->info()->dimension(0));
+            float *output_start_ptr = output_ptr + output_width_start * output->info()->dimension(0);
+            for(; x <= limit - window_step_x; x += window_step_x, input_offset[0] += window_step_x)
+            {
+                auto in = load_as_f32(reinterpret_cast<T *>(input->ptr_to_element(input_offset)));
+                wrapper::vstore(output_start_ptr + x, in);
+            }
+            for(; x < limit; ++x, ++input_offset[0])
+            {
+                *(output_start_ptr + x) = static_cast<float>(*reinterpret_cast<T *>(input->ptr_to_element(input_offset)));
+            }
+        }
+    }
+}
+
+inline void out_of_bounds_crop_window(const ITensor *output, float *output_ptr, float extrapolation_value,
+                                      int32_t window_step_x, int32_t output_width_start, int32_t output_width_limit)
+{
+    auto    in               = wrapper::vdup_n(extrapolation_value, wrapper::traits::vector_128_tag());
+    int32_t x                = 0;
+    int32_t limit            = (output_width_limit - output_width_start) * static_cast<int32_t>(output->info()->dimension(0));
+    float *output_start_ptr = output_ptr + output_width_start * output->info()->dimension(0);
+    for(; x <= limit - window_step_x; x += window_step_x)
+    {
+        wrapper::vstore(output_start_ptr + x, in);
+    }
+    for(; x < limit; ++x)
+    {
+        *(output_start_ptr + x) = extrapolation_value;
+    }
+}
+
+template <bool is_height_flipped, bool has_cols_in_bounds, bool has_cols_out_of_bounds_before, bool has_cols_out_of_bounds_after>
+inline void execute_window(const ITensor *input, const ITensor *output, Coordinates input_offset, float extrapolation_value,
+                           const uint32_t rows_out_of_bounds[], const uint32_t cols_out_of_bounds[], NECropKernel::InBoundsCropFunction *in_bounds_crop_function)
+{
+    // Output is always float.
+    const int window_step_x = 16 / sizeof(float);
+    auto     *output_ptr    = reinterpret_cast<float *>(output->buffer());
+    //  Output window:
+    //  --------------------------------
+    //  |          Out of bounds       |
+    //  |          rows before         |
+    //  |------------------------------|
+    //  | Out of | In         | Out of |
+    //  | bounds | bounds     | bounds |
+    //  | cols   | elements   | cols   |
+    //  | before | copied     | after  |
+    //  |        | from input |        |
+    //  --------------------------------
+    //  |        Out of bounds         |
+    //  |        rows after            |
+    //  |------------------------------|
+    // Fill all output rows that have no elements that are within the input bounds with the extrapolation value.
+    // First for the rows before the in bounds rows.
+    out_of_bounds_crop_window(output, output_ptr, extrapolation_value, window_step_x, 0, rows_out_of_bounds[0] * output->info()->dimension(1));
+    output_ptr += rows_out_of_bounds[0] * output->info()->dimension(1) * output->info()->dimension(0);
+    // Iterate through each row that has any elements within the input bounds.
+    for(uint32_t row = rows_out_of_bounds[0]; static_cast<int32_t>(row) < static_cast<int32_t>(output->info()->dimension(2) - rows_out_of_bounds[1]);
+        ++row, is_height_flipped ? --input_offset[2] : ++input_offset[2])
+    {
+        // Fill all elements in the row that are out of bounds with the extrapolation value.
+        // First for the elements before the in bounds elements.
+        if(has_cols_out_of_bounds_before)
+        {
+            out_of_bounds_crop_window(output, output_ptr, extrapolation_value, window_step_x, 0, cols_out_of_bounds[0]);
+        }
+        // Copy all elements within the input bounds from the input tensor.
+        if(has_cols_in_bounds)
+        {
+            (*in_bounds_crop_function)(input, output, output_ptr, input_offset, window_step_x, cols_out_of_bounds[0], output->info()->dimension(1) - cols_out_of_bounds[1]);
+        }
+        // Fill all elements after the in bounds elements with the extrapolation value.
+        if(has_cols_out_of_bounds_after)
+        {
+            out_of_bounds_crop_window(output, output_ptr, extrapolation_value, window_step_x, output->info()->dimension(1) - cols_out_of_bounds[1], output->info()->dimension(1));
+        }
+        output_ptr += output->info()->dimension(1) * output->info()->dimension(0);
+    }
+    // Fill all rows after the in bounds elements with the extrapolation value.
+    out_of_bounds_crop_window(output, output_ptr, extrapolation_value, window_step_x, 0, rows_out_of_bounds[1] * output->info()->dimension(1));
+}
+} // namespace
+
+NECropKernel::NECropKernel()
+    : _input(nullptr), _crop_boxes(nullptr), _box_ind(nullptr), _output(nullptr), _start(), _end(), _crop_box_ind(0), _extrapolation_value(0), _rows_out_of_bounds(), _cols_out_of_bounds(),
+      _in_bounds_crop_functions(), _in_bounds_crop_function(nullptr), _crop_function(nullptr)
+{
+}
+
+void NECropKernel::configure(const ITensor *input, const ITensor *crop_boxes, const ITensor *box_ind, ITensor *output, uint32_t crop_box_ind, float extrapolation_value)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), crop_boxes->info(), box_ind->info(), output->info(), crop_box_ind, extrapolation_value));
+
+    _input               = input;
+    _crop_boxes          = crop_boxes;
+    _box_ind             = box_ind;
+    _output              = output;
+    _crop_box_ind        = crop_box_ind;
+    _extrapolation_value = extrapolation_value;
+
+    const static std::map<std::pair<DataType, bool>, std::pair<NECropKernel::InBoundsCropFunction *, NECropKernel::InBoundsCropFunction *>> in_map_function =
+    {
+        { { DataType::F32, false }, { &in_bounds_crop_window<float, false, false>, &in_bounds_crop_window<float, false, true> } },
+        { { DataType::F32, true }, { &in_bounds_crop_window<float, true, false>, &in_bounds_crop_window<float, true, true> } },
+        { { DataType::U16, false }, { &in_bounds_crop_window<uint16_t, false, false>, &in_bounds_crop_window<uint16_t, false, true> } },
+        { { DataType::U16, true }, { &in_bounds_crop_window<uint16_t, true, false>, &in_bounds_crop_window<uint16_t, true, true> } },
+        { { DataType::S16, false }, { &in_bounds_crop_window<int16_t, false, false>, &in_bounds_crop_window<int16_t, false, true> } },
+        { { DataType::S16, true }, { &in_bounds_crop_window<int16_t, true, false>, &in_bounds_crop_window<int16_t, true, true> } },
+        { { DataType::U32, false }, { &in_bounds_crop_window<uint32_t, false, false>, &in_bounds_crop_window<uint32_t, false, true> } },
+        { { DataType::U32, true }, { &in_bounds_crop_window<uint32_t, true, false>, &in_bounds_crop_window<uint32_t, true, true> } },
+        { { DataType::S32, false }, { &in_bounds_crop_window<int32_t, false, false>, &in_bounds_crop_window<int32_t, false, true> } },
+        { { DataType::S32, true }, { &in_bounds_crop_window<int32_t, true, false>, &in_bounds_crop_window<int32_t, true, true> } },
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+        { { DataType::F16, false }, { &in_bounds_crop_window<float16_t, false, false>, &in_bounds_crop_window<float16_t, false, true> } },
+        { { DataType::F16, false }, { &in_bounds_crop_window<float16_t, true, false>, &in_bounds_crop_window<float16_t, true, true> } }
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+    };
+
+    auto in_it = in_map_function.find({ input->info()->data_type(), input->info()->dimension(0) == 1 });
+
+    if(in_it != in_map_function.end())
+    {
+        _in_bounds_crop_functions = in_it->second;
+    }
+}
+
+Status NECropKernel::validate(const ITensorInfo *input, const ITensorInfo *crop_boxes, const ITensorInfo *box_ind, const ITensorInfo *output, uint32_t crop_box_ind, float extrapolation_value)
+{
+    ARM_COMPUTE_UNUSED(extrapolation_value);
+    ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16, DataType::S16, DataType::F16, DataType::U32, DataType::S32, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NHWC);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().num_dimensions() > 4);
+    ARM_COMPUTE_RETURN_ERROR_ON(crop_boxes->tensor_shape()[0] != 4);
+    ARM_COMPUTE_RETURN_ERROR_ON(crop_boxes->tensor_shape()[1] != box_ind->tensor_shape()[0]);
+    ARM_COMPUTE_RETURN_ERROR_ON(crop_boxes->tensor_shape()[1] <= crop_box_ind);
+    ARM_COMPUTE_RETURN_ERROR_ON(box_ind->tensor_shape()[0] <= crop_box_ind);
+    if(output->total_size() > 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(output, DataType::F32);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() != 3);
+        ARM_COMPUTE_RETURN_ERROR_ON(output->has_padding());
+    }
+    return Status{};
+}
+
+void NECropKernel::configure_output_shape()
+{
+    // _crop_box_ind is used to index _crop_boxes and retrieve the appropriate crop box.
+    // The crop box is specified by normalized coordinates [y0, x0, y1, x1].
+    const float x0 = *reinterpret_cast<const float *>(_crop_boxes->ptr_to_element(Coordinates(1, _crop_box_ind)));
+    const float y0 = *reinterpret_cast<const float *>(_crop_boxes->ptr_to_element(Coordinates(0, _crop_box_ind)));
+    const float x1 = *reinterpret_cast<const float *>(_crop_boxes->ptr_to_element(Coordinates(3, _crop_box_ind)));
+    const float y1 = *reinterpret_cast<const float *>(_crop_boxes->ptr_to_element(Coordinates(2, _crop_box_ind)));
+    // The normalized coordiantes are scaled to retrieve the floating point image coordinates which are rounded to integers.
+    _start = Coordinates(std::floor(x0 * (_input->info()->tensor_shape()[1] - 1) + 0.5f),
+                         std::floor(y0 * (_input->info()->tensor_shape()[2] - 1) + 0.5f));
+    _end = Coordinates(std::floor(x1 * (_input->info()->tensor_shape()[1] - 1) + 0.5f),
+                       std::floor(y1 * (_input->info()->tensor_shape()[2] - 1) + 0.5f));
+    const TensorShape out_shape(_input->info()->tensor_shape()[0], abs(_end[0] - _start[0]) + 1, abs(_end[1] - _start[1]) + 1);
+    _output->info()->set_tensor_shape(out_shape);
+
+    _in_bounds_crop_function = _start[0] <= _end[0] ? _in_bounds_crop_functions.first : _in_bounds_crop_functions.second;
+
+    bool is_width_flipped  = _end[0] < _start[0];
+    bool is_height_flipped = _end[1] < _start[1];
+    if(is_height_flipped)
+    {
+        _rows_out_of_bounds[0] = _start[1] >= static_cast<int32_t>(_input->info()->dimension(2)) ? std::min(static_cast<uint32_t>(_start[1] - _input->info()->dimension(2) + 1),
+                                                                                                            static_cast<uint32_t>(_output->info()->dimension(2))) :
+                                 0;
+        _rows_out_of_bounds[1] = _end[1] < 0 ? std::min(static_cast<uint32_t>(-_end[1]),
+                                                        static_cast<uint32_t>(_output->info()->dimension(2))) :
+                                 0;
+    }
+    else
+    {
+        _rows_out_of_bounds[0] = _start[1] < 0 ? std::min(static_cast<uint32_t>(-_start[1]),
+                                                          static_cast<uint32_t>(_output->info()->dimension(2))) :
+                                 0;
+        _rows_out_of_bounds[1] = _end[1] >= static_cast<int32_t>(_input->info()->dimension(2)) ? std::min(static_cast<uint32_t>(_end[1] - _input->info()->dimension(2) + 1),
+                                                                                                          static_cast<uint32_t>(_output->info()->dimension(2))) :
+                                 0;
+    }
+    if(is_width_flipped)
+    {
+        _cols_out_of_bounds[0] = _start[0] >= static_cast<int32_t>(_input->info()->dimension(1)) ? std::min(static_cast<uint32_t>(_start[0] - _input->info()->dimension(1) + 1),
+                                                                                                            static_cast<uint32_t>(_output->info()->dimension(1))) :
+                                 0;
+        _cols_out_of_bounds[1] = _end[0] < 0 ? std::min(static_cast<uint32_t>(-_end[0]),
+                                                        static_cast<uint32_t>(_output->info()->dimension(1))) :
+                                 0;
+    }
+    else
+    {
+        _cols_out_of_bounds[0] = _start[0] < 0 ? std::min(static_cast<uint32_t>(-_start[0]),
+                                                          static_cast<uint32_t>(_output->info()->dimension(1))) :
+                                 0;
+        _cols_out_of_bounds[1] = _end[0] >= static_cast<int32_t>(_input->info()->dimension(1)) ? std::min(static_cast<uint32_t>(_end[0] - _input->info()->dimension(1) + 1),
+                                                                                                          static_cast<uint32_t>(_output->info()->dimension(1))) :
+                                 0;
+    }
+
+    const static std::map<std::tuple<bool, bool, bool, bool>, NECropKernel::CropFunction *> map_function =
+    {
+        { std::make_tuple(false, false, false, false), &execute_window<false, false, false, false> },
+        { std::make_tuple(false, false, false, true), &execute_window<false, false, false, true> },
+        { std::make_tuple(false, false, true, false), &execute_window<false, false, true, false> },
+        { std::make_tuple(false, false, true, true), &execute_window<false, false, true, true> },
+        { std::make_tuple(false, true, false, false), &execute_window<false, true, false, false> },
+        { std::make_tuple(false, true, false, true), &execute_window<false, true, false, true> },
+        { std::make_tuple(false, true, true, false), &execute_window<false, true, true, false> },
+        { std::make_tuple(false, true, true, true), &execute_window<false, true, true, true> },
+        { std::make_tuple(true, false, false, false), &execute_window<true, false, false, false> },
+        { std::make_tuple(true, false, false, true), &execute_window<true, false, false, true> },
+        { std::make_tuple(true, false, true, false), &execute_window<true, false, true, false> },
+        { std::make_tuple(true, false, true, true), &execute_window<true, false, true, true> },
+        { std::make_tuple(true, true, false, false), &execute_window<true, true, false, false> },
+        { std::make_tuple(true, true, false, true), &execute_window<true, true, false, true> },
+        { std::make_tuple(true, true, true, false), &execute_window<true, true, true, false> },
+        { std::make_tuple(true, true, true, true), &execute_window<true, true, true, true> },
+    };
+
+    auto it = map_function.find(std::make_tuple(is_height_flipped,
+                                                _cols_out_of_bounds[0] + _cols_out_of_bounds[1] < _output->info()->dimension(1),
+                                                _cols_out_of_bounds[0] > 0,
+                                                _cols_out_of_bounds[1] > 0));
+
+    if(it != map_function.end())
+    {
+        _crop_function = it->second;
+    }
+
+    INEKernel::configure(calculate_max_window(*_output->info()));
+}
+
+void NECropKernel::run(const Window &window, const ThreadInfo &info)
+{
+    ARM_COMPUTE_UNUSED(window, info);
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+
+    ARM_COMPUTE_ERROR_ON(_input->info()->has_padding());
+    ARM_COMPUTE_ERROR_ON(_output->info()->has_padding());
+
+    uint32_t    batch_index = *(reinterpret_cast<int32_t *>(_box_ind->ptr_to_element(Coordinates(_crop_box_ind))));
+    Coordinates input_offset(0, _end[0] < _start[0] ? _start[0] - _cols_out_of_bounds[0] : _start[0] + _cols_out_of_bounds[0],
+                             _end[1] < _start[1] ? _start[1] - _rows_out_of_bounds[0] : _start[1] + _rows_out_of_bounds[0], batch_index);
+    (*_crop_function)(_input, _output, input_offset, _extrapolation_value, _rows_out_of_bounds, _cols_out_of_bounds, _in_bounds_crop_function);
+}
+} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index 3d300ef26b..64f35290ba 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -45,7 +45,7 @@ namespace
 {
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy,
                           const ITensorInfo *offsets, ITensorInfo *output, InterpolationPolicy policy,
-                          BorderMode border_mode, SamplingPolicy sampling_policy)
+                          BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy, bool use_padding)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32, DataType::QASYMM8);
@@ -53,7 +53,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON(output == input);
     ARM_COMPUTE_RETURN_ERROR_ON(sampling_policy != SamplingPolicy::CENTER && sampling_policy != SamplingPolicy::TOP_LEFT);
-    ARM_COMPUTE_UNUSED(border_mode);
+    ARM_COMPUTE_RETURN_ERROR_ON(!use_padding && border_mode != BorderMode::CONSTANT);
+    ARM_COMPUTE_UNUSED(constant_border_value);
 
     const DataLayout data_layout = input->data_layout();
     ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)) == 0);
@@ -121,40 +122,44 @@ std::pair<Status, Window> validate_and_configure_window_nchw(ITensorInfo *input,
 
 std::pair<Status, Window> validate_and_configure_window_nhwc(ITensorInfo *input, ITensorInfo *output,
                                                              InterpolationPolicy policy, bool border_undefined,
-                                                             SamplingPolicy sampling_policy, BorderSize border_size)
+                                                             SamplingPolicy sampling_policy, BorderSize border_size, bool use_padding)
 {
     bool   window_changed{ false };
     Window win{};
 
-    const unsigned int num_elems_processed_per_iteration = (policy == InterpolationPolicy::NEAREST_NEIGHBOR) ? 16 / input->element_size() : 1;
+    const unsigned int num_elems_processed_per_iteration = (use_padding && policy == InterpolationPolicy::NEAREST_NEIGHBOR) ? 16 / input->element_size() : 1;
 
     // Configure kernel window
     win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
 
-    AccessWindowStatic input_access(input, 0, -border_size.top,
-                                    ceil_to_multiple(input->tensor_shape()[0], num_elems_processed_per_iteration),
-                                    input->tensor_shape()[1]);
-    AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-
-    window_changed = update_window_and_padding(win, input_access, output_access);
-    output->set_valid_region(calculate_valid_region_scale(*input, output->tensor_shape(),
-                                                          policy, sampling_policy, border_undefined));
+    if(use_padding)
+    {
+        AccessWindowStatic input_access(input, 0, -border_size.top, use_padding ? ceil_to_multiple(input->tensor_shape()[0], num_elems_processed_per_iteration) : num_elems_processed_per_iteration,
+                                        input->tensor_shape()[1]);
+        AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+        window_changed = update_window_and_padding(win, input_access, output_access);
+        output->set_valid_region(calculate_valid_region_scale(*input, output->tensor_shape(), policy, sampling_policy, border_undefined));
+    }
 
     Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
     return std::make_pair(err, win);
 }
 
 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *dx, ITensorInfo *dy, ITensorInfo *offsets, ITensorInfo *output,
-                                                        InterpolationPolicy policy, bool border_undefined, SamplingPolicy sampling_policy, BorderSize border_size)
+                                                        InterpolationPolicy policy, bool border_undefined, SamplingPolicy sampling_policy, BorderSize border_size, bool use_padding)
 {
     std::pair<Status, Window> win_config;
     switch(input->data_layout())
     {
         case DataLayout::NCHW:
+            if(!use_padding)
+            {
+                return std::make_pair(ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Padding required for NCHW"), Window{});
+            }
             win_config = validate_and_configure_window_nchw(input, dx, dy, offsets, output, policy, border_undefined, sampling_policy, border_size);
             break;
         case DataLayout::NHWC:
-            win_config = validate_and_configure_window_nhwc(input, output, policy, border_undefined, sampling_policy, border_size);
+            win_config = validate_and_configure_window_nhwc(input, output, policy, border_undefined, sampling_policy, border_size, use_padding);
             break;
         default:
             win_config = std::make_pair(ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported data layout!"), Window{});
@@ -167,6 +172,12 @@ template <typename T>
 inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets, ITensor *output,
                                     float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c)
 {
+    const int  window_step_x  = 16 / sizeof(T);
+    const auto window_start_x = static_cast<int32_t>(window.x().start());
+    const auto window_end_x   = static_cast<int32_t>(window.x().end());
+
+    window.set(Window::DimX, Window::Dimension(0, 1, 1));
+
     Iterator in(input, win_in);
     Iterator out(output, window);
 
@@ -174,18 +185,28 @@ inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets
 
     execute_window_loop(window, [&](const Coordinates & id)
     {
-        const auto offset     = *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z())));
-        const int  in_yi      = (id.z() + 0.5f) * hr;
-        const int  offset_row = in_yi * stride_h + id.x() * stride_c;
-        wrapper::vstore(reinterpret_cast<T *>(out.ptr()),
-                        wrapper::vloadq(reinterpret_cast<const T *>(in.ptr() + offset * offsets_stride + offset_row)));
+        const int32_t offset     = *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z())));
+        const int     in_yi      = (id.z() + 0.5f) * hr;
+        const int     offset_row = in_yi * stride_h;
+        int32_t       x          = window_start_x;
+        for(; x < window_end_x - window_step_x; x += window_step_x)
+        {
+            wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x,
+                            wrapper::vloadq(reinterpret_cast<const T *>(in.ptr() + offset * offsets_stride + offset_row + x * stride_c)));
+        }
+        for(; x < window_end_x; ++x)
+        {
+            *(reinterpret_cast<T *>(out.ptr()) + x) =
+                *(reinterpret_cast<const T *>(in.ptr() + offset * offsets_stride + offset_row + x * stride_c));
+        }
     },
     in, out);
 }
 
-template <typename T>
+template <typename T, typename ConstType>
 inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offsets, const ITensor *dx, const ITensor *dy, ITensor *output,
-                                     float hr, float sampling_offset, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, BorderMode border_mode)
+                                     float hr, float sampling_offset, Window window, const Window &win_in, size_t stride_w, size_t stride_h,
+                                     size_t stride_c, BorderMode border_mode, PixelValue constant_border_value, bool use_padding)
 {
     Iterator in(input, win_in);
     Iterator out(output, window);
@@ -196,7 +217,15 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
     const int input_width  = input->info()->dimension(1);
     const int input_height = input->info()->dimension(2);
 
-    const T *border_area = reinterpret_cast<T *>(input->buffer() + input->info()->offset_first_element_in_bytes() - stride_w);
+    T border_value;
+    if(use_padding)
+    {
+        border_value = *reinterpret_cast<T *>(input->buffer() + input->info()->offset_first_element_in_bytes() - stride_w);
+    }
+    else
+    {
+        border_value = static_cast<T>(constant_border_value.get<ConstType>());
+    }
 
     auto is_valid = [](int x, int low_x, int high_x, int y, int low_y, int high_y)
     {
@@ -224,10 +253,10 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
 
             if(border_mode == BorderMode::CONSTANT)
             {
-                a00 = is_valid(offset, 0, input_width - 1, in_yi, 0, input_height - 1) ? *in_ptr : *border_area;
-                a01 = is_valid(offset + 1, 0, input_width - 1, in_yi, 0, input_height - 1) ? *(in_ptr + stride_w_elems) : *border_area;
-                a10 = is_valid(offset, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems) : *border_area;
-                a11 = is_valid(offset + 1, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems + stride_w_elems) : *border_area;
+                a00 = is_valid(offset, 0, input_width - 1, in_yi, 0, input_height - 1) ? *in_ptr : border_value;
+                a01 = is_valid(offset + 1, 0, input_width - 1, in_yi, 0, input_height - 1) ? *(in_ptr + stride_w_elems) : border_value;
+                a10 = is_valid(offset, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems) : border_value;
+                a11 = is_valid(offset + 1, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems + stride_w_elems) : border_value;
             }
             else if(border_mode == BorderMode::REPLICATE)
             {
@@ -279,7 +308,7 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
         {
             if(border_mode == BorderMode::CONSTANT)
             {
-                *reinterpret_cast<T *>(out.ptr()) = *border_area;
+                *reinterpret_cast<T *>(out.ptr()) = border_value;
             }
             else if(border_mode == BorderMode::REPLICATE)
             {
@@ -294,7 +323,8 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
 } // namespace
 
 NEScaleKernel::NEScaleKernel()
-    : _func(nullptr), _offsets(nullptr), _dx(nullptr), _dy(nullptr), _input(nullptr), _output(nullptr), _policy(), _border_size(1), _border_mode(), _sampling_offset(0)
+    : _func(nullptr), _offsets(nullptr), _dx(nullptr), _dy(nullptr), _input(nullptr), _output(nullptr), _policy(), _border_size(1), _border_mode(), _constant_border_value(0), _sampling_offset(0),
+      _use_padding(true)
 {
 }
 
@@ -304,31 +334,33 @@ BorderSize NEScaleKernel::border_size() const
 }
 
 void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets,
-                              ITensor *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy)
+                              ITensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy,
+                              bool use_padding)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
     // Perform validation step
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(),
                                                   dx != nullptr ? dx->info() : nullptr,
                                                   dy != nullptr ? dy->info() : nullptr,
                                                   offsets != nullptr ? offsets->info() : nullptr,
                                                   output->info(),
-                                                  policy, border_mode, sampling_policy));
+                                                  policy, border_mode, constant_border_value, sampling_policy, use_padding));
 
     // Get data layout and width/height indices
     const DataLayout data_layout = input->info()->data_layout();
     const int        idx_width   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
     const int        idx_height  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
 
-    _input       = input;
-    _output      = output;
-    _offsets     = offsets;
-    _dx          = dx;
-    _dy          = dy;
-    _policy      = policy;
-    _border_size = BorderSize(1);
-    _border_mode = border_mode;
+    _input                 = input;
+    _output                = output;
+    _offsets               = offsets;
+    _dx                    = dx;
+    _dy                    = dy;
+    _policy                = policy;
+    _border_size           = BorderSize(1);
+    _border_mode           = border_mode;
+    _constant_border_value = constant_border_value;
+    _use_padding           = use_padding;
 
     if(sampling_policy == SamplingPolicy::CENTER)
     {
@@ -342,7 +374,7 @@ void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITe
     // Add constant border only on top in case of NHWC layout
     if(data_layout == DataLayout::NHWC)
     {
-        _border_size = (border_mode == BorderMode::CONSTANT && policy == InterpolationPolicy::BILINEAR) ? BorderSize(1, 0, 0, 0) : BorderSize(0);
+        _border_size = (border_mode == BorderMode::CONSTANT && policy == InterpolationPolicy::BILINEAR && use_padding) ? BorderSize(1, 0, 0, 0) : BorderSize(0);
     }
 
     // Area interpolation behaves as Nearest Neighbour in case of up-sampling
@@ -379,7 +411,8 @@ void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITe
                                                                          dy != nullptr ? dy->info() : nullptr,
                                                                          offsets != nullptr ? offsets->info() : nullptr,
                                                                          output->info(),
-                                                                         policy, border_mode == BorderMode::UNDEFINED, sampling_policy, border_size());
+                                                                         policy, border_mode == BorderMode::UNDEFINED, sampling_policy, border_size(), use_padding);
+
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
     INEKernel::configure(win_config.second);
 }
@@ -904,8 +937,8 @@ void NEScaleKernel::scale_nhwc(const Window &window)
             }
             else
             {
-                scale_bilinear_nhwc_core<uint8_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
-                                                  window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
+                scale_bilinear_nhwc_core<uint8_t, uint8_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
+                                                           window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
             }
             break;
         }
@@ -917,8 +950,8 @@ void NEScaleKernel::scale_nhwc(const Window &window)
             }
             else
             {
-                scale_bilinear_nhwc_core<int16_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
-                                                  window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
+                scale_bilinear_nhwc_core<int16_t, int16_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
+                                                           window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
             }
             break;
         }
@@ -932,8 +965,8 @@ void NEScaleKernel::scale_nhwc(const Window &window)
             }
             else
             {
-                scale_bilinear_nhwc_core<float16_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
-                                                    window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
+                scale_bilinear_nhwc_core<float16_t, half>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
+                                                          window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
             }
             break;
         }
@@ -946,8 +979,8 @@ void NEScaleKernel::scale_nhwc(const Window &window)
             }
             else
             {
-                scale_bilinear_nhwc_core<float>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
-                                                window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
+                scale_bilinear_nhwc_core<float, float>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
+                                                       window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
             }
             break;
         }
@@ -959,7 +992,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
 
 Status NEScaleKernel::validate(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy,
                                const ITensorInfo *offsets, ITensorInfo *output, InterpolationPolicy policy,
-                               BorderMode border_mode, SamplingPolicy sampling_policy)
+                               BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy, bool use_padding)
 {
     BorderSize border_size(1);
     if(input->data_layout() == DataLayout::NHWC)
@@ -967,13 +1000,13 @@ Status NEScaleKernel::validate(const ITensorInfo *input, const ITensorInfo *dx,
         border_size = (border_mode == BorderMode::CONSTANT && policy == InterpolationPolicy::BILINEAR) ? BorderSize(1, 0, 0, 0) : BorderSize(0);
     }
 
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, dx, dy, offsets, output, policy, border_mode, sampling_policy));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, dx, dy, offsets, output, policy, border_mode, constant_border_value, sampling_policy, use_padding));
     ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(),
                                                               dx != nullptr ? dx->clone().get() : nullptr,
                                                               dy != nullptr ? dy->clone().get() : nullptr,
                                                               offsets != nullptr ? offsets->clone().get() : nullptr,
                                                               output->clone().get(),
-                                                              policy, border_mode == BorderMode::UNDEFINED, sampling_policy, border_size)
+                                                              policy, border_mode == BorderMode::UNDEFINED, sampling_policy, border_size, use_padding)
                                 .first);
 
     return Status{};
diff --git a/src/runtime/NEON/functions/NECropResize.cpp b/src/runtime/NEON/functions/NECropResize.cpp
new file mode 100644
index 0000000000..4360b50dfb
--- /dev/null
+++ b/src/runtime/NEON/functions/NECropResize.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+#include "arm_compute/runtime/NEON/functions/NECropResize.h"
+
+#include <cstddef>
+
+namespace arm_compute
+{
+NECropResize::NECropResize()
+    : _output(nullptr), _num_boxes(0), _method(), _extrapolation_value(0), _crop(), _scale()
+{
+}
+
+Status NECropResize::validate(const ITensorInfo *input, const ITensorInfo *boxes, const ITensorInfo *box_ind, const ITensorInfo *output,
+                              Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON(crop_size.x <= 0 || crop_size.y <= 0);
+    ARM_COMPUTE_RETURN_ERROR_ON(method == InterpolationPolicy::AREA);
+    TensorInfo temp_info;
+    ARM_COMPUTE_RETURN_ON_ERROR(NECropKernel::validate(input->clone().get(), boxes->clone().get(), box_ind->clone().get(), &temp_info, boxes->tensor_shape()[1] - 1, extrapolation_value));
+    if(output->total_size() > 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(output, DataType::F32);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
+        TensorShape out_shape(input->tensor_shape()[0], crop_size.x, crop_size.y, boxes->tensor_shape()[1]);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), out_shape);
+    }
+    return Status{};
+}
+
+void NECropResize::configure(const ITensor *input, const ITensor *boxes, const ITensor *box_ind, ITensor *output, Coordinates2D crop_size,
+                             InterpolationPolicy method, float extrapolation_value)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_ERROR_THROW_ON(NECropResize::validate(input->info(), boxes->info(), box_ind->info(), output->info(), crop_size, method, extrapolation_value));
+
+    _num_boxes = boxes->info()->tensor_shape()[1];
+    TensorShape out_shape(input->info()->tensor_shape()[0], crop_size.x, crop_size.y);
+
+    _output              = output;
+    _method              = method;
+    _extrapolation_value = extrapolation_value;
+
+    // For each crop box:
+    // - A crop kernel is used to extract the initial cropped image as specified by boxes[i] from the 3D image input[box_ind[i]].
+    // - A tensor is required to hold this initial cropped image.
+    // - A scale function is used to resize the cropped image to the size specified by crop_size.
+    // - A tensor is required to hold the final scaled image before it is copied into the 4D output
+    //   that will hold all final cropped and scaled 3D images.
+    _crop           = arm_compute::support::cpp14::make_unique<NECropKernel[]>(_num_boxes);
+    _crop_results   = arm_compute::support::cpp14::make_unique<Tensor[]>(_num_boxes);
+    _scale          = arm_compute::support::cpp14::make_unique<NEScale[]>(_num_boxes);
+    _scaled_results = arm_compute::support::cpp14::make_unique<Tensor[]>(_num_boxes);
+
+    for(unsigned int i = 0; i < _num_boxes; ++i)
+    {
+        TensorInfo crop_result_info(1, DataType::F32);
+        crop_result_info.set_data_layout(DataLayout::NHWC);
+        _crop_results[i].allocator()->init(crop_result_info);
+
+        TensorInfo scaled_result_info(out_shape, 1, DataType::F32);
+        scaled_result_info.set_data_layout(DataLayout::NHWC);
+        _scaled_results[i].allocator()->init(scaled_result_info);
+
+        _crop[i].configure(input, boxes, box_ind, &_crop_results[i], i, _extrapolation_value);
+    }
+}
+
+void NECropResize::run()
+{
+    ARM_COMPUTE_ERROR_ON_MSG(_output == nullptr, "Unconfigured function");
+
+    for(unsigned int i = 0; i < _num_boxes; ++i)
+    {
+        // Size of the crop box in _boxes and thus the shape of _crop_results[i]
+        // may not be known until run-time and so the kernels cannot be configured until then.
+        _crop[i].configure_output_shape();
+        _crop_results[i].allocator()->allocate();
+        NEScheduler::get().schedule(&_crop[i], Window::DimZ);
+
+        // Scale the cropped image.
+        _scale[i].configure(&_crop_results[i], &_scaled_results[i], _method, BorderMode::CONSTANT, PixelValue(_extrapolation_value), SamplingPolicy::TOP_LEFT, false);
+        _scaled_results[i].allocator()->allocate();
+        _scale[i].run();
+
+        // Copy scaled image into output.
+        std::copy_n(_scaled_results[i].buffer(), _scaled_results[i].info()->total_size(), _output->ptr_to_element(Coordinates(0, 0, 0, i)));
+    }
+}
+} // namespace arm_compute
+\ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp
index 483aa4c0b5..425ee6c4db 100644
--- a/src/runtime/NEON/functions/NEScale.cpp
+++ b/src/runtime/NEON/functions/NEScale.cpp
@@ -97,14 +97,17 @@ NEScale::NEScale() // NOLINT
       _dx(),
       _dy(),
       _scale_kernel(),
-      _border_handler()
+      _border_handler(),
+      _use_padding(true)
 {
 }
 
-void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy)
+void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy, bool use_padding)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_ERROR_THROW_ON(NEScale::validate(input->info(), output->info(), policy, border_mode, constant_border_value, sampling_policy));
+    ARM_COMPUTE_ERROR_THROW_ON(NEScale::validate(input->info(), output->info(), policy, border_mode, constant_border_value, sampling_policy, use_padding));
+
+    _use_padding = use_padding;
 
     // Get data layout and width/height indices
     const DataLayout data_layout = input->info()->data_layout();
@@ -134,7 +137,7 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol
             TensorInfo tensor_info_offsets(shape, Format::S32);
             _offsets.allocator()->init(tensor_info_offsets);
 
-            _scale_kernel.configure(input, nullptr, nullptr, &_offsets, output, policy, border_mode, sampling_policy);
+            _scale_kernel.configure(input, nullptr, nullptr, &_offsets, output, policy, border_mode, constant_border_value, sampling_policy, use_padding);
 
             // Allocate once the configure methods have been called
             _offsets.allocator()->allocate();
@@ -152,7 +155,7 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol
             _dx.allocator()->init(tensor_info_dxdy);
             _dy.allocator()->init(tensor_info_dxdy);
 
-            _scale_kernel.configure(input, &_dx, &_dy, &_offsets, output, policy, border_mode, sampling_policy);
+            _scale_kernel.configure(input, &_dx, &_dy, &_offsets, output, policy, border_mode, constant_border_value, sampling_policy, use_padding);
 
             // Allocate once the configure methods have been called
             _offsets.allocator()->allocate();
@@ -165,18 +168,20 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol
         }
         case InterpolationPolicy::AREA:
         {
-            _scale_kernel.configure(input, nullptr, nullptr, nullptr, output, policy, border_mode);
+            _scale_kernel.configure(input, nullptr, nullptr, nullptr, output, policy, border_mode, constant_border_value);
             break;
         }
         default:
             ARM_COMPUTE_ERROR("Unsupported interpolation mode");
     }
-
-    _border_handler.configure(input, _scale_kernel.border_size(), border_mode, constant_border_value);
+    if(use_padding)
+    {
+        _border_handler.configure(input, _scale_kernel.border_size(), border_mode, constant_border_value);
+    }
 }
 
 Status NEScale::validate(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy,
-                         BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy)
+                         BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy, bool use_padding)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON(sampling_policy != SamplingPolicy::CENTER && sampling_policy != SamplingPolicy::TOP_LEFT);
@@ -213,12 +218,15 @@ Status NEScale::validate(const ITensorInfo *input, const ITensorInfo *output, In
     }
 
     ARM_COMPUTE_RETURN_ON_ERROR(NEScaleKernel::validate(input->clone().get(), dx, dy, offsets, output->clone().get(),
-                                                        policy, border_mode, sampling_policy));
+                                                        policy, border_mode, constant_border_value, sampling_policy, use_padding));
     return Status{};
 }
 
 void NEScale::run()
 {
-    NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+    if(_use_padding)
+    {
+        NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+    }
     NEScheduler::get().schedule(&_scale_kernel, Window::DimY);
 }
diff --git a/tests/datasets/CropResizeDataset.h b/tests/datasets/CropResizeDataset.h
new file mode 100644
index 0000000000..8cee094fc8
--- /dev/null
+++ b/tests/datasets/CropResizeDataset.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_CROP_RESIZE_DATASET
+#define ARM_COMPUTE_TEST_CROP_RESIZE_DATASET
+
+#include "utils/TypePrinter.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class CropResizeDataset
+{
+public:
+    using type = std::tuple<TensorShape, TensorShape, Coordinates2D, InterpolationPolicy, float>;
+
+    struct iterator
+    {
+        iterator(std::vector<TensorShape>::const_iterator         src_shapes_it,
+                 std::vector<TensorShape>::const_iterator         boxes_shapes_it,
+                 std::vector<Coordinates2D>::const_iterator       crop_size_values_it,
+                 std::vector<InterpolationPolicy>::const_iterator method_values_it,
+                 std::vector<float>::const_iterator               extrapolation_values_it)
+            : _src_shapes_it{ std::move(src_shapes_it) },
+              _boxes_shapes_it{ std::move(boxes_shapes_it) },
+              _crop_size_values_it{ std::move(crop_size_values_it) },
+              _method_values_it{ std::move(method_values_it) },
+              _extrapolation_values_it{ std::move(extrapolation_values_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "Src_Shape=" << *_src_shapes_it << ":";
+            description << "Boxes_Shape=" << *_boxes_shapes_it << ":";
+            description << "Crop_Size=(" << (*_crop_size_values_it).x << "," << (*_crop_size_values_it).y << "):";
+            description << "Method=" << *_method_values_it << ":";
+            description << "Extrapolation_value=" << *_extrapolation_values_it << ":";
+            return description.str();
+        }
+
+        CropResizeDataset::type operator*() const
+        {
+            return std::make_tuple(*_src_shapes_it, *_boxes_shapes_it, *_crop_size_values_it, *_method_values_it, *_extrapolation_values_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_src_shapes_it;
+            ++_boxes_shapes_it;
+            ++_crop_size_values_it;
+            ++_method_values_it;
+            ++_extrapolation_values_it;
+            return *this;
+        }
+
+    private:
+        std::vector<TensorShape>::const_iterator         _src_shapes_it;
+        std::vector<TensorShape>::const_iterator         _boxes_shapes_it;
+        std::vector<Coordinates2D>::const_iterator       _crop_size_values_it;
+        std::vector<InterpolationPolicy>::const_iterator _method_values_it;
+        std::vector<float>::const_iterator               _extrapolation_values_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_src_shapes.begin(), _boxes_shapes.begin(), _crop_size_values.begin(), _method_values.begin(), _extrapolation_values.begin());
+    }
+
+    int size() const
+    {
+        return std::min(_src_shapes.size(), std::min(_boxes_shapes.size(), std::min(_crop_size_values.size(), std::min(_method_values.size(), _extrapolation_values.size()))));
+    }
+
+    void add_config(TensorShape src_shape, TensorShape boxes_shape, Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value)
+    {
+        _src_shapes.emplace_back(std::move(src_shape));
+        _boxes_shapes.emplace_back(std::move(boxes_shape));
+        _crop_size_values.emplace_back(std::move(crop_size));
+        _method_values.emplace_back(std::move(method));
+        _extrapolation_values.emplace_back(std::move(extrapolation_value));
+    }
+
+protected:
+    CropResizeDataset()                     = default;
+    CropResizeDataset(CropResizeDataset &&) = default;
+
+private:
+    std::vector<TensorShape>         _src_shapes{};
+    std::vector<TensorShape>         _boxes_shapes{};
+    std::vector<Coordinates2D>       _crop_size_values{};
+    std::vector<InterpolationPolicy> _method_values{};
+    std::vector<float>               _extrapolation_values{};
+};
+
+class SmallCropResizeDataset final : public CropResizeDataset
+{
+public:
+    SmallCropResizeDataset()
+    {
+        add_config(TensorShape(1U, 5U, 5U), TensorShape(4, 5), Coordinates2D{ 2, 2 }, InterpolationPolicy::BILINEAR, 100);
+        add_config(TensorShape(3U, 5U, 5U), TensorShape(4, 5), Coordinates2D{ 2, 2 }, InterpolationPolicy::BILINEAR, 100);
+        add_config(TensorShape(1U, 5U, 5U), TensorShape(4, 5), Coordinates2D{ 10, 10 }, InterpolationPolicy::BILINEAR, 100);
+        add_config(TensorShape(15U, 30U, 30U, 10U), TensorShape(4, 20), Coordinates2D{ 10, 10 }, InterpolationPolicy::BILINEAR, 100);
+
+        add_config(TensorShape(1U, 5U, 5U), TensorShape(4, 5), Coordinates2D{ 2, 2 }, InterpolationPolicy::NEAREST_NEIGHBOR, 100);
+        add_config(TensorShape(3U, 5U, 5U), TensorShape(4, 5), Coordinates2D{ 2, 2 }, InterpolationPolicy::NEAREST_NEIGHBOR, 100);
+        add_config(TensorShape(1U, 5U, 5U), TensorShape(4, 5), Coordinates2D{ 10, 10 }, InterpolationPolicy::NEAREST_NEIGHBOR, 100);
+        add_config(TensorShape(15U, 30U, 30U, 10U), TensorShape(4, 20), Coordinates2D{ 10, 10 }, InterpolationPolicy::NEAREST_NEIGHBOR, 100);
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_CROP_RESIZE_DATASET */
+\ No newline at end of file
diff --git a/tests/validation/NEON/CropResize.cpp b/tests/validation/NEON/CropResize.cpp
new file mode 100644
index 0000000000..1feed3d9d2
--- /dev/null
+++ b/tests/validation/NEON/CropResize.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NECropResize.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "tests/NEON/Accessor.h"
+#include "tests/datasets/CropResizeDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/CropResizeFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(CropResize)
+
+RelativeTolerance<float> tolerance_fp32(0.001f);
+
+template <typename T>
+using NECropResizeFixture = CropResizeFixture<Tensor, Accessor, NECropResize, T>;
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::U8),  // Invalid input data type.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid box_ind shape.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid output shape.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid output data type.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid output shape.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid boxes shape.
+                                                     }),
+               framework::dataset::make("BoxesInfo",{  TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(3, 20), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("BoxIndInfo",{ TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(10), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                     })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 10U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(5U, 5, 5, 20U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("Expected", { true, false, false, false, false, false, false})),
+               input, boxes, box_ind, output, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(NECropResize::validate(&input.clone()->set_data_layout(DataLayout::NHWC).set_is_resizable(false),
+                                                   &boxes.clone()->set_is_resizable(false),
+                                                   &box_ind.clone()->set_is_resizable(false),
+                                                   &output.clone()->set_data_layout(DataLayout::NHWC).set_is_resizable(false),
+                                                   Coordinates2D{ 5, 5 }, InterpolationPolicy::BILINEAR, 100)) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE(Float)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(F16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NECropResizeFixture<half>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::F16))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // F16
+#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NECropResizeFixture<float>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::F32))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // F32
+TEST_SUITE_END() // Float
+
+TEST_SUITE(U16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NECropResizeFixture<uint16_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::U16))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // U16
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NECropResizeFixture<int16_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::S16))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // S16
+
+TEST_SUITE(U32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NECropResizeFixture<uint32_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::U32))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // U32
+
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NECropResizeFixture<int32_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::S32))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // S32
+
+TEST_SUITE_END() // CropResize
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/CropResizeFixture.h b/tests/validation/fixtures/CropResizeFixture.h
new file mode 100644
index 0000000000..d83c4113f5
--- /dev/null
+++ b/tests/validation/fixtures/CropResizeFixture.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE
+#define ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/RawLutAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/CropResize.h"
+#include "tests/validation/reference/Permute.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class CropResizeFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape src_shape, TensorShape boxes_shape, Coordinates2D crop_size, InterpolationPolicy method,
+               float extrapolation_value, bool is_outside_bounds, DataType data_type)
+    {
+        _target    = compute_target(src_shape, boxes_shape, crop_size, method, extrapolation_value, is_outside_bounds, data_type);
+        _reference = compute_reference(src_shape, boxes_shape, crop_size, method, extrapolation_value, is_outside_bounds, data_type);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        library->fill_tensor_uniform(tensor, i);
+    }
+
+    template <typename U, typename V>
+    void fill(U &&tensor, int i, V min, V max)
+    {
+        library->fill_tensor_uniform(tensor, i, min, max);
+    }
+
+    TensorType compute_target(const TensorShape &src_shape, const TensorShape &boxes_shape, const Coordinates2D &crop_size, InterpolationPolicy method,
+                              float extrapolation_value, bool is_outside_bounds, DataType data_type)
+    {
+        TensorShape dst_shape(src_shape[0], crop_size.x, crop_size.y, boxes_shape[1]);
+
+        // Create tensors
+        TensorType src       = create_tensor<TensorType>(src_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC);
+        TensorType boxes     = create_tensor<TensorType>(boxes_shape, DataType::F32);
+        TensorType boxes_ind = create_tensor<TensorType>(TensorShape(boxes_shape[1]), DataType::S32);
+        TensorType dst       = create_tensor<TensorType>(dst_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC);
+
+        // Create and configure function
+        FunctionType crop;
+        crop.configure(&src, &boxes, &boxes_ind, &dst, crop_size, method, extrapolation_value);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(boxes.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(boxes_ind.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        boxes.allocator()->allocate();
+        boxes_ind.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!boxes.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!boxes_ind.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), 0);
+        fill(AccessorType(boxes), 1, is_outside_bounds ? 0.0f - out_of_bounds_reach : 0.0f, is_outside_bounds ? 1.0f + out_of_bounds_reach : 1.0f);
+        fill(AccessorType(boxes_ind), 2, 0, static_cast<int32_t>(src_shape[3] - 1));
+
+        // Compute function
+        crop.run();
+        return dst;
+    }
+
+    SimpleTensor<float> compute_reference(const TensorShape &src_shape, const TensorShape &boxes_shape, const Coordinates2D &crop_size, InterpolationPolicy method,
+                                          float extrapolation_value, bool is_outside_bounds, DataType data_type)
+    {
+        // Create reference
+        SimpleTensor<T>       src{ src_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
+        SimpleTensor<float>   boxes{ boxes_shape, DataType::F32 };
+        SimpleTensor<int32_t> boxes_ind{ TensorShape(boxes_shape[1]), DataType::S32 };
+
+        // Fill reference
+        fill(src, 0);
+        fill(boxes, 1, is_outside_bounds ? 0.0f - out_of_bounds_reach : 0.0f, is_outside_bounds ? 1.0f + out_of_bounds_reach : 1.0f);
+        fill(boxes_ind, 2, 0, static_cast<int32_t>(src.shape()[3] - 1));
+
+        SimpleTensor<float> output = reference::crop_and_resize(src, boxes, boxes_ind, crop_size, method, extrapolation_value);
+
+        SimpleTensor<float> permuted = reference::permute(output, PermutationVector(1, 2U, 0U));
+        return permuted;
+    }
+
+    constexpr static float out_of_bounds_reach = 2.0f;
+
+    TensorType          _target{};
+    SimpleTensor<float> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE */
diff --git a/tests/validation/reference/CropResize.cpp b/tests/validation/reference/CropResize.cpp
new file mode 100644
index 0000000000..8cfce97eec
--- /dev/null
+++ b/tests/validation/reference/CropResize.cpp
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CropResize.h"
+#include "Utils.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+SimpleTensor<float> scale_image(const SimpleTensor<float> &in, const TensorShape &out_shape, InterpolationPolicy policy, float extrapolation_value)
+{
+    ARM_COMPUTE_ERROR_ON(in.data_layout() != DataLayout::NHWC);
+
+    SimpleTensor<float> out{ out_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC };
+    // Compute the ratio between source width/height and destination width/height
+    const auto wr = static_cast<float>(in.shape()[1]) / static_cast<float>(out_shape[1]);
+    const auto hr = static_cast<float>(in.shape()[2]) / static_cast<float>(out_shape[2]);
+
+    const auto width  = static_cast<int>(in.shape().y());
+    const auto height = static_cast<int>(in.shape().z());
+
+    Window win;
+    win.use_tensor_dimensions(out_shape);
+    execute_window_loop(win, [&](const Coordinates & out_id)
+    {
+        Coordinates in_id(out_id);
+        int         idw = in_id.y();
+        int         idh = in_id.z();
+
+        switch(policy)
+        {
+            case InterpolationPolicy::NEAREST_NEIGHBOR:
+            {
+                //Calculate the source coords without -0.5f is equivalent to round the x_scr/y_src coords
+                float x_src = (idw + 0.5f) * wr;
+                float y_src = (idh + 0.5f) * hr;
+                in_id.set(1, x_src);
+                in_id.set(2, y_src);
+
+                // If coordinates in range of tensor's width or height
+                if(is_valid_pixel_index(x_src, y_src, width, height, 0))
+                {
+                    *reinterpret_cast<float *>(out(out_id)) = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+                }
+                else
+                {
+                    *reinterpret_cast<float *>(out(out_id)) = extrapolation_value;
+                }
+                break;
+            }
+            case InterpolationPolicy::BILINEAR:
+            {
+                float x_src = idw * wr;
+                float y_src = idh * hr;
+                in_id.set(1, std::floor(x_src));
+                in_id.set(2, std::floor(y_src));
+                if(is_valid_pixel_index(x_src, y_src, width, height, 0))
+                {
+                    const int id_w = in_id[1];
+                    const int id_h = in_id[2];
+
+                    const float dx   = x_src - id_w;
+                    const float dy   = y_src - id_h;
+                    const float dx_1 = 1.0f - dx;
+                    const float dy_1 = 1.0f - dy;
+
+                    in_id.set(1, id_w);
+                    in_id.set(2, id_h);
+                    const float tl = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+                    in_id.set(1, id_w + 1);
+                    in_id.set(2, id_h);
+                    const float tr = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+                    in_id.set(1, id_w);
+                    in_id.set(2, id_h + 1);
+                    const float bl = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+                    in_id.set(1, id_w + 1);
+                    in_id.set(2, id_h + 1);
+                    const float br = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+
+                    *reinterpret_cast<float *>(out(out_id)) = tl * (dx_1 * dy_1) + tr * (dx * dy_1) + bl * (dx_1 * dy) + br * (dx * dy);
+                }
+                else
+                {
+                    *reinterpret_cast<float *>(out(out_id)) = extrapolation_value;
+                }
+                break;
+            }
+            default:
+                ARM_COMPUTE_ERROR("Unsupported interpolation mode");
+        }
+    });
+
+    return out;
+}
+
+template <typename T>
+SimpleTensor<float> crop_image(const SimpleTensor<T> &src, Coordinates start, Coordinates end, int32_t batch_index, float extrapolation_value)
+{
+    TensorShape out_shape(src.shape()[0], abs(end[0] - start[0]) + 1, abs(end[1] - start[1]) + 1);
+
+    SimpleTensor<float> out{ out_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC };
+
+    Window win;
+    win.use_tensor_dimensions(out_shape);
+    execute_window_loop(win, [&](const Coordinates & id)
+    {
+        bool        out_of_bounds = false;
+        Coordinates offset(id[0], 0, 0, batch_index);
+        for(uint32_t i = 1; i < 3; ++i)
+        {
+            offset.set(i, end[i - 1] < start[i - 1] ? start[i - 1] - id[i] : start[i - 1] + id[i]);
+            if(offset[i] < 0 || static_cast<uint32_t>(offset[i]) > src.shape()[i] - 1)
+            {
+                out_of_bounds = true;
+                break;
+            }
+        }
+        if(!out_of_bounds)
+        {
+            *reinterpret_cast<float *>(out(id)) = static_cast<float>(*reinterpret_cast<const T *>(src(offset)));
+        }
+        else
+        {
+            *reinterpret_cast<float *>(out(id)) = extrapolation_value;
+        }
+    });
+    return out;
+}
+
+} // namespace
+
+template <typename T>
+SimpleTensor<float> crop_and_resize(const SimpleTensor<T> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                    Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value)
+{
+    ARM_COMPUTE_ERROR_ON(src.shape().num_dimensions() > 4);
+    ARM_COMPUTE_ERROR_ON(src.data_layout() != DataLayout::NHWC);
+
+    const TensorShape   out_shape(src.shape()[0], crop_size.x, crop_size.y, boxes.shape()[1]);
+    SimpleTensor<float> out{ out_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC };
+
+    const TensorShape scaled_image_shape(src.shape()[0], crop_size.x, crop_size.y);
+
+    for(uint32_t i = 0; i < boxes.shape()[1]; ++i)
+    {
+        Coordinates start = Coordinates(std::floor((*reinterpret_cast<const float *>(boxes(Coordinates(1, i)))) * (src.shape()[1] - 1) + 0.5f),
+                                        std::floor((*reinterpret_cast<const float *>(boxes(Coordinates(0, i)))) * (src.shape()[2] - 1) + 0.5f));
+        Coordinates end = Coordinates(std::floor((*reinterpret_cast<const float *>(boxes(Coordinates(3, i)))) * (src.shape()[1] - 1) + 0.5f),
+                                      std::floor((*reinterpret_cast<const float *>(boxes(Coordinates(2, i)))) * (src.shape()[2] - 1) + 0.5f));
+        SimpleTensor<float> cropped = crop_image(src, start, end, box_ind[i], extrapolation_value);
+        SimpleTensor<float> scaled  = scale_image(cropped, scaled_image_shape, method, extrapolation_value);
+        std::copy_n(reinterpret_cast<float *>(scaled.data()), scaled.num_elements(), reinterpret_cast<float *>(out(Coordinates(0, 0, 0, i))));
+    }
+    return out;
+}
+
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<float> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<uint16_t> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<uint32_t> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<int16_t> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<int32_t> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<half> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/CropResize.h b/tests/validation/reference/CropResize.h
new file mode 100644
index 0000000000..517c24bd32
--- /dev/null
+++ b/tests/validation/reference/CropResize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_CROP_RESIZE_H__
+#define __ARM_COMPUTE_TEST_CROP_RESIZE_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<float> crop_and_resize(const SimpleTensor<T> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                    Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_CROP_RESIZE_H__ */
author	George Wort <george.wort@arm.com>	2019-01-25 15:38:33 +0000
committer	Pablo Marquez <pablo.tello@arm.com>	2019-03-05 11:21:01 +0000
commit	05398a948a2b43584b16d91f6efdda9eb361ec74 (patch)
tree	01963cd67610dd69915076be8577b28e025eb848
parent	f112ede50530374b48ea2f87c1f0e02262cffc78 (diff)
download	ComputeLibrary-05398a948a2b43584b16d91f6efdda9eb361ec74.tar.gz