From 04a8f8c4994f1c32b3f16a832c0e6f2599364c02 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Thu, 23 Nov 2017 11:45:24 +0000 Subject: COMPMID-692 Consistent names for the interfaces Change-Id: I4b1f3f0da9ff5342c7de7083736fe91871d14e5b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110351 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Georgios Pinitas Reviewed-by: Anthony Barbier --- arm_compute/core/CL/CLKernels.h | 8 +- .../core/CL/kernels/CLDepthConcatenateKernel.h | 77 --- .../CL/kernels/CLDepthConcatenateLayerKernel.h | 77 +++ arm_compute/core/CL/kernels/CLDepthConvertKernel.h | 63 --- .../core/CL/kernels/CLDepthConvertLayerKernel.h | 63 +++ .../CL/kernels/CLDepthwiseConvolution3x3Kernel.h | 75 --- .../kernels/CLDepthwiseConvolutionLayer3x3Kernel.h | 75 +++ arm_compute/core/CL/kernels/CLL2NormalizeKernel.h | 72 --- .../core/CL/kernels/CLL2NormalizeLayerKernel.h | 72 +++ arm_compute/core/GLES_COMPUTE/GCKernels.h | 2 +- .../kernels/GCDepthConcatenateKernel.h | 76 --- .../kernels/GCDepthConcatenateLayerKernel.h | 76 +++ arm_compute/core/NEON/NEKernels.h | 8 +- .../core/NEON/kernels/NEDepthConcatenateKernel.h | 80 ---- .../NEON/kernels/NEDepthConcatenateLayerKernel.h | 80 ++++ .../core/NEON/kernels/NEDepthConvertKernel.h | 85 ---- .../core/NEON/kernels/NEDepthConvertLayerKernel.h | 85 ++++ .../NEON/kernels/NEDepthwiseConvolution3x3Kernel.h | 69 --- .../kernels/NEDepthwiseConvolutionLayer3x3Kernel.h | 69 +++ .../core/NEON/kernels/NEL2NormalizeKernel.h | 70 --- .../core/NEON/kernels/NEL2NormalizeLayerKernel.h | 70 +++ arm_compute/graph/nodes/L2NormalizeLayer.h | 2 +- arm_compute/runtime/CL/CLFunctions.h | 8 +- .../runtime/CL/functions/CLDepthConcatenate.h | 70 --- .../runtime/CL/functions/CLDepthConcatenateLayer.h | 70 +++ arm_compute/runtime/CL/functions/CLDepthConvert.h | 64 --- .../runtime/CL/functions/CLDepthConvertLayer.h | 64 +++ .../runtime/CL/functions/CLDepthwiseConvolution.h | 110 ----- .../CL/functions/CLDepthwiseConvolutionLayer.h | 110 +++++ .../CLDepthwiseSeparableConvolutionLayer.h | 8 +- arm_compute/runtime/CL/functions/CLL2Normalize.h | 69 --- .../runtime/CL/functions/CLL2NormalizeLayer.h | 69 +++ .../runtime/CL/functions/CLLaplacianPyramid.h | 4 +- .../runtime/CL/functions/CLLaplacianReconstruct.h | 6 +- arm_compute/runtime/GLES_COMPUTE/GCFunctions.h | 2 +- .../GLES_COMPUTE/functions/GCDepthConcatenate.h | 67 --- .../functions/GCDepthConcatenateLayer.h | 67 +++ arm_compute/runtime/NEON/NEFunctions.h | 8 +- .../runtime/NEON/functions/NEDepthConcatenate.h | 67 --- .../NEON/functions/NEDepthConcatenateLayer.h | 67 +++ .../runtime/NEON/functions/NEDepthConvert.h | 69 --- .../runtime/NEON/functions/NEDepthConvertLayer.h | 69 +++ .../NEON/functions/NEDepthwiseConvolution.h | 113 ----- .../NEON/functions/NEDepthwiseConvolutionLayer.h | 113 +++++ .../NEDepthwiseSeparableConvolutionLayer.h | 8 +- arm_compute/runtime/NEON/functions/NEL2Normalize.h | 70 --- .../runtime/NEON/functions/NEL2NormalizeLayer.h | 70 +++ .../runtime/NEON/functions/NELaplacianPyramid.h | 4 +- .../NEON/functions/NELaplacianReconstruct.h | 6 +- docs/00_introduction.dox | 10 +- src/core/CL/kernels/CLDepthConcatenateKernel.cpp | 139 ------ .../CL/kernels/CLDepthConcatenateLayerKernel.cpp | 139 ++++++ src/core/CL/kernels/CLDepthConvertKernel.cpp | 117 ----- src/core/CL/kernels/CLDepthConvertLayerKernel.cpp | 117 +++++ .../CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp | 219 --------- .../CLDepthwiseConvolutionLayer3x3Kernel.cpp | 219 +++++++++ src/core/CL/kernels/CLL2NormalizeKernel.cpp | 110 ----- src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp | 110 +++++ .../kernels/GCDepthConcatenateKernel.cpp | 143 ------ .../kernels/GCDepthConcatenateLayerKernel.cpp | 143 ++++++ src/core/NEON/kernels/NEDepthConcatenateKernel.cpp | 170 ------- .../NEON/kernels/NEDepthConcatenateLayerKernel.cpp | 170 +++++++ src/core/NEON/kernels/NEDepthConvertKernel.cpp | 524 --------------------- .../NEON/kernels/NEDepthConvertLayerKernel.cpp | 524 +++++++++++++++++++++ .../kernels/NEDepthwiseConvolution3x3Kernel.cpp | 186 -------- .../NEDepthwiseConvolutionLayer3x3Kernel.cpp | 186 ++++++++ src/core/NEON/kernels/NEL2NormalizeKernel.cpp | 126 ----- src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp | 126 +++++ src/graph/operations/CLSimpleOperations.cpp | 10 +- src/graph/operations/NESimpleOperations.cpp | 10 +- src/runtime/CL/functions/CLDepthConcatenate.cpp | 78 --- .../CL/functions/CLDepthConcatenateLayer.cpp | 78 +++ src/runtime/CL/functions/CLDepthConvert.cpp | 38 -- src/runtime/CL/functions/CLDepthConvertLayer.cpp | 38 ++ .../CL/functions/CLDepthwiseConvolution.cpp | 138 ------ .../CL/functions/CLDepthwiseConvolutionLayer.cpp | 138 ++++++ src/runtime/CL/functions/CLL2Normalize.cpp | 63 --- src/runtime/CL/functions/CLL2NormalizeLayer.cpp | 63 +++ src/runtime/CL/functions/CLLaplacianPyramid.cpp | 2 +- .../GLES_COMPUTE/functions/GCDepthConcatenate.cpp | 69 --- .../functions/GCDepthConcatenateLayer.cpp | 69 +++ src/runtime/NEON/functions/NEDepthConcatenate.cpp | 74 --- .../NEON/functions/NEDepthConcatenateLayer.cpp | 74 +++ src/runtime/NEON/functions/NEDepthConvert.cpp | 38 -- src/runtime/NEON/functions/NEDepthConvertLayer.cpp | 38 ++ .../NEON/functions/NEDepthwiseConvolution.cpp | 126 ----- .../NEON/functions/NEDepthwiseConvolutionLayer.cpp | 126 +++++ src/runtime/NEON/functions/NEL2Normalize.cpp | 57 --- src/runtime/NEON/functions/NEL2NormalizeLayer.cpp | 57 +++ src/runtime/NEON/functions/NELaplacianPyramid.cpp | 2 +- tests/benchmark/CL/DepthwiseConvolution.cpp | 51 -- tests/benchmark/CL/DepthwiseConvolutionLayer.cpp | 51 ++ tests/benchmark/CL/SYSTEM/MobileNet.cpp | 4 +- tests/benchmark/CL/SYSTEM/MobileNetV1.cpp | 6 +- .../fixtures/DepthwiseConvolutionFixture.h | 92 ---- .../fixtures/DepthwiseConvolutionLayerFixture.h | 92 ++++ tests/benchmark/fixtures/MobileNetFixture.h | 4 +- tests/datasets/DepthwiseConvolutionDataset.h | 187 -------- tests/datasets/DepthwiseConvolutionLayerDataset.h | 187 ++++++++ .../MobileNetDepthwiseConvolutionDataset.h | 58 --- .../MobileNetDepthwiseConvolutionLayerDataset.h | 58 +++ tests/datasets/ShapeDatasets.h | 6 +- tests/networks/MobileNetNetwork.h | 8 +- tests/validation/CL/DepthConcatenateLayer.cpp | 10 +- tests/validation/CL/DepthConvert.cpp | 484 ------------------- tests/validation/CL/DepthConvertLayer.cpp | 492 +++++++++++++++++++ tests/validation/CL/DepthwiseConvolution.cpp | 114 ----- tests/validation/CL/DepthwiseConvolutionLayer.cpp | 117 +++++ tests/validation/CL/L2Normalize.cpp | 77 --- tests/validation/CL/L2NormalizeLayer.cpp | 77 +++ tests/validation/CPP/DepthConvert.cpp | 157 ------ tests/validation/CPP/DepthConvert.h | 56 --- tests/validation/CPP/DepthConvertLayer.cpp | 157 ++++++ tests/validation/CPP/DepthConvertLayer.h | 56 +++ tests/validation/CPP/DepthwiseConvolution.cpp | 195 -------- tests/validation/CPP/DepthwiseConvolution.h | 44 -- tests/validation/CPP/DepthwiseConvolutionLayer.cpp | 195 ++++++++ tests/validation/CPP/DepthwiseConvolutionLayer.h | 44 ++ .../CPP/DepthwiseSeparableConvolutionLayer.cpp | 2 +- tests/validation/CPP/L2Normalize.cpp | 88 ---- tests/validation/CPP/L2Normalize.h | 44 -- tests/validation/CPP/L2NormalizeLayer.cpp | 88 ++++ tests/validation/CPP/L2NormalizeLayer.h | 44 ++ .../GLES_COMPUTE/DepthConcatenateLayer.cpp | 6 +- tests/validation/NEON/DepthConcatenateLayer.cpp | 12 +- tests/validation/NEON/DepthConvert.cpp | 484 ------------------- tests/validation/NEON/DepthConvertLayer.cpp | 492 +++++++++++++++++++ tests/validation/NEON/DepthwiseConvolution.cpp | 126 ----- .../validation/NEON/DepthwiseConvolutionLayer.cpp | 131 ++++++ tests/validation/NEON/L2Normalize.cpp | 75 --- tests/validation/NEON/L2NormalizeLayer.cpp | 75 +++ .../fixtures/DepthConcatenateLayerFixture.h | 2 +- tests/validation/fixtures/DepthConvertFixture.h | 131 ------ .../validation/fixtures/DepthConvertLayerFixture.h | 131 ++++++ .../fixtures/DepthwiseConvolutionFixture.h | 179 ------- .../fixtures/DepthwiseConvolutionLayerFixture.h | 179 +++++++ tests/validation/fixtures/L2NormalizeFixture.h | 107 ----- .../validation/fixtures/L2NormalizeLayerFixture.h | 107 +++++ 138 files changed, 6638 insertions(+), 6614 deletions(-) delete mode 100644 arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDepthConvertKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h delete mode 100644 arm_compute/core/CL/kernels/CLL2NormalizeKernel.h create mode 100644 arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h delete mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEDepthConvertKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h delete mode 100644 arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h delete mode 100644 arm_compute/runtime/CL/functions/CLDepthConcatenate.h create mode 100644 arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h delete mode 100644 arm_compute/runtime/CL/functions/CLDepthConvert.h create mode 100644 arm_compute/runtime/CL/functions/CLDepthConvertLayer.h delete mode 100644 arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h create mode 100644 arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h delete mode 100644 arm_compute/runtime/CL/functions/CLL2Normalize.h create mode 100644 arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h delete mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h delete mode 100644 arm_compute/runtime/NEON/functions/NEDepthConcatenate.h create mode 100644 arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h delete mode 100644 arm_compute/runtime/NEON/functions/NEDepthConvert.h create mode 100644 arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h delete mode 100644 arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h create mode 100644 arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h delete mode 100644 arm_compute/runtime/NEON/functions/NEL2Normalize.h create mode 100644 arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h delete mode 100644 src/core/CL/kernels/CLDepthConcatenateKernel.cpp create mode 100644 src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp delete mode 100644 src/core/CL/kernels/CLDepthConvertKernel.cpp create mode 100644 src/core/CL/kernels/CLDepthConvertLayerKernel.cpp delete mode 100644 src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp create mode 100644 src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp delete mode 100644 src/core/CL/kernels/CLL2NormalizeKernel.cpp create mode 100644 src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp delete mode 100644 src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp delete mode 100644 src/core/NEON/kernels/NEDepthConcatenateKernel.cpp create mode 100644 src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp delete mode 100644 src/core/NEON/kernels/NEDepthConvertKernel.cpp create mode 100644 src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp delete mode 100644 src/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.cpp create mode 100644 src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp delete mode 100644 src/core/NEON/kernels/NEL2NormalizeKernel.cpp create mode 100644 src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp delete mode 100644 src/runtime/CL/functions/CLDepthConcatenate.cpp create mode 100644 src/runtime/CL/functions/CLDepthConcatenateLayer.cpp delete mode 100644 src/runtime/CL/functions/CLDepthConvert.cpp create mode 100644 src/runtime/CL/functions/CLDepthConvertLayer.cpp delete mode 100644 src/runtime/CL/functions/CLDepthwiseConvolution.cpp create mode 100644 src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp delete mode 100644 src/runtime/CL/functions/CLL2Normalize.cpp create mode 100644 src/runtime/CL/functions/CLL2NormalizeLayer.cpp delete mode 100755 src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp create mode 100755 src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp delete mode 100644 src/runtime/NEON/functions/NEDepthConcatenate.cpp create mode 100644 src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp delete mode 100644 src/runtime/NEON/functions/NEDepthConvert.cpp create mode 100644 src/runtime/NEON/functions/NEDepthConvertLayer.cpp delete mode 100644 src/runtime/NEON/functions/NEDepthwiseConvolution.cpp create mode 100644 src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp delete mode 100644 src/runtime/NEON/functions/NEL2Normalize.cpp create mode 100644 src/runtime/NEON/functions/NEL2NormalizeLayer.cpp delete mode 100644 tests/benchmark/CL/DepthwiseConvolution.cpp create mode 100644 tests/benchmark/CL/DepthwiseConvolutionLayer.cpp delete mode 100644 tests/benchmark/fixtures/DepthwiseConvolutionFixture.h create mode 100644 tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h delete mode 100644 tests/datasets/DepthwiseConvolutionDataset.h create mode 100644 tests/datasets/DepthwiseConvolutionLayerDataset.h delete mode 100644 tests/datasets/MobileNetDepthwiseConvolutionDataset.h create mode 100644 tests/datasets/MobileNetDepthwiseConvolutionLayerDataset.h delete mode 100644 tests/validation/CL/DepthConvert.cpp create mode 100644 tests/validation/CL/DepthConvertLayer.cpp delete mode 100644 tests/validation/CL/DepthwiseConvolution.cpp create mode 100644 tests/validation/CL/DepthwiseConvolutionLayer.cpp delete mode 100644 tests/validation/CL/L2Normalize.cpp create mode 100644 tests/validation/CL/L2NormalizeLayer.cpp delete mode 100644 tests/validation/CPP/DepthConvert.cpp delete mode 100644 tests/validation/CPP/DepthConvert.h create mode 100644 tests/validation/CPP/DepthConvertLayer.cpp create mode 100644 tests/validation/CPP/DepthConvertLayer.h delete mode 100644 tests/validation/CPP/DepthwiseConvolution.cpp delete mode 100644 tests/validation/CPP/DepthwiseConvolution.h create mode 100644 tests/validation/CPP/DepthwiseConvolutionLayer.cpp create mode 100644 tests/validation/CPP/DepthwiseConvolutionLayer.h delete mode 100644 tests/validation/CPP/L2Normalize.cpp delete mode 100644 tests/validation/CPP/L2Normalize.h create mode 100644 tests/validation/CPP/L2NormalizeLayer.cpp create mode 100644 tests/validation/CPP/L2NormalizeLayer.h delete mode 100644 tests/validation/NEON/DepthConvert.cpp create mode 100644 tests/validation/NEON/DepthConvertLayer.cpp delete mode 100644 tests/validation/NEON/DepthwiseConvolution.cpp create mode 100644 tests/validation/NEON/DepthwiseConvolutionLayer.cpp delete mode 100644 tests/validation/NEON/L2Normalize.cpp create mode 100644 tests/validation/NEON/L2NormalizeLayer.cpp delete mode 100644 tests/validation/fixtures/DepthConvertFixture.h create mode 100644 tests/validation/fixtures/DepthConvertLayerFixture.h delete mode 100644 tests/validation/fixtures/DepthwiseConvolutionFixture.h create mode 100644 tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h delete mode 100644 tests/validation/fixtures/L2NormalizeFixture.h create mode 100644 tests/validation/fixtures/L2NormalizeLayerFixture.h diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h index 1ffbad90cf..9da0e5ab3a 100644 --- a/arm_compute/core/CL/CLKernels.h +++ b/arm_compute/core/CL/CLKernels.h @@ -42,9 +42,9 @@ #include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" #include "arm_compute/core/CL/kernels/CLColorConvertKernel.h" #include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h" @@ -76,7 +76,7 @@ #include "arm_compute/core/CL/kernels/CLHistogramKernel.h" #include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" #include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" -#include "arm_compute/core/CL/kernels/CLL2NormalizeKernel.h" +#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h" #include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" #include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" #include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h deleted file mode 100644 index 2833d8ec23..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ -#define __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the depth concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLDepthConcatenateKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthConcatenateKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthConcatenateKernel(const CLDepthConcatenateKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthConcatenateKernel &operator=(const CLDepthConcatenateKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDepthConcatenateKernel(CLDepthConcatenateKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDepthConcatenateKernel &operator=(CLDepthConcatenateKernel &&) = default; - /** Default destructor */ - ~CLDepthConcatenateKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: QS8/QS16/F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - int _top_bottom; - int _left_right; - unsigned int _depth_offset; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h new file mode 100644 index 0000000000..467bdfab3b --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class CLDepthConcatenateLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthConcatenateLayerKernel(const CLDepthConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthConcatenateLayerKernel &operator=(const CLDepthConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthConcatenateLayerKernel(CLDepthConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~CLDepthConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: QS8/QS16/F16/F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + int _top_bottom; + int _left_right; + unsigned int _depth_offset; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthConvertKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertKernel.h deleted file mode 100644 index da70bff0fd..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthConvertKernel.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2016, 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ -#define __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" -#include "arm_compute/core/Types.h" - -#include - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the depth conversion kernel. - * - */ -class CLDepthConvertKernel : public ICLSimple2DKernel -{ -public: - /** Set the input and output of the kernel. - * - * Valid conversions Input -> Output : - * - * - QS8 -> F32 - * - QS16 -> F32 - * - U8 -> U16, S16, U32, S32 - * - U16 -> U8, U32, S32 - * - S16 -> U8, U32, S32 - * - U32 -> U8, U16, S16 - * - S32 -> U8, U16, S16 - * - F32 -> QS8, QS16 - * - * @param[in] input The input tensor to convert. Data types supported: U8/QS8/U16/S16/QS16/U32/S32/F32. - * @param[out] output The output tensor. Data types supported: U8/QS8/U16/S16/QS16/U32/S32/F32. - * @param[in] policy Conversion policy - * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. - */ - void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h new file mode 100644 index 0000000000..3a6310d69e --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depth conversion kernel. + * + */ +class CLDepthConvertLayerKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and output of the kernel. + * + * Valid conversions Input -> Output : + * + * - QS8 -> F32 + * - QS16 -> F32 + * - U8 -> U16, S16, U32, S32 + * - U16 -> U8, U32, S32 + * - S16 -> U8, U32, S32 + * - U32 -> U8, U16, S16 + * - S32 -> U8, U16, S16 + * - F32 -> QS8, QS16 + * + * @param[in] input The input tensor to convert. Data types supported: U8/QS8/U16/S16/QS16/U32/S32/F32. + * @param[out] output The output tensor. Data types supported: U8/QS8/U16/S16/QS16/U32/S32/F32. + * @param[in] policy Conversion policy + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h deleted file mode 100644 index f9689a4329..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__ -#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor. - */ -class CLDepthwiseConvolution3x3Kernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthwiseConvolution3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseConvolution3x3Kernel(const CLDepthwiseConvolution3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseConvolution3x3Kernel &operator=(const CLDepthwiseConvolution3x3Kernel &) = delete; - /** Default Move Constructor. */ - CLDepthwiseConvolution3x3Kernel(CLDepthwiseConvolution3x3Kernel &&) = default; - /** Default move assignment operator. */ - CLDepthwiseConvolution3x3Kernel &operator=(CLDepthwiseConvolution3x3Kernel &&) = default; - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] input Source tensor. DataType supported: QASYMM8/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - BorderSize _border_size; - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_weights; - const ICLTensor *_biases; - unsigned int _conv_stride_x; - unsigned int _conv_stride_y; - unsigned int _conv_pad_left; - unsigned int _conv_pad_top; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h new file mode 100644 index 0000000000..eb62465f84 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__ +#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor. + */ +class CLDepthwiseConvolutionLayer3x3Kernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthwiseConvolutionLayer3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayer3x3Kernel(const CLDepthwiseConvolutionLayer3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayer3x3Kernel &operator=(const CLDepthwiseConvolutionLayer3x3Kernel &) = delete; + /** Default Move Constructor. */ + CLDepthwiseConvolutionLayer3x3Kernel(CLDepthwiseConvolutionLayer3x3Kernel &&) = default; + /** Default move assignment operator. */ + CLDepthwiseConvolutionLayer3x3Kernel &operator=(CLDepthwiseConvolutionLayer3x3Kernel &&) = default; + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] input Source tensor. DataType supported: QASYMM8/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_weights; + const ICLTensor *_biases; + unsigned int _conv_stride_x; + unsigned int _conv_stride_y; + unsigned int _conv_pad_left; + unsigned int _conv_pad_top; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__ */ diff --git a/arm_compute/core/CL/kernels/CLL2NormalizeKernel.h b/arm_compute/core/CL/kernels/CLL2NormalizeKernel.h deleted file mode 100644 index 2056b4e615..0000000000 --- a/arm_compute/core/CL/kernels/CLL2NormalizeKernel.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__ -#define __ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the reduction operation kernel */ -class CLL2NormalizeKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLL2NormalizeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLL2NormalizeKernel(const CLL2NormalizeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLL2NormalizeKernel &operator=(const CLL2NormalizeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLL2NormalizeKernel(CLL2NormalizeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLL2NormalizeKernel &operator=(CLL2NormalizeKernel &&) = default; - /** Default destructor */ - ~CLL2NormalizeKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QS8, QS16, F32. - * @param[in] sum Sum values tensor. Data types supported: same as @p input. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 - * @param[in] epsilon Lower bound value for the normalization. - */ - void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_sum; - ICLTensor *_output; - unsigned int _axis; - float _epsilon; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h new file mode 100644 index 0000000000..f7d717119b --- /dev/null +++ b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__ +#define __ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the reduction operation kernel */ +class CLL2NormalizeLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLL2NormalizeLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLL2NormalizeLayerKernel(const CLL2NormalizeLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLL2NormalizeLayerKernel &operator=(const CLL2NormalizeLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLL2NormalizeLayerKernel(CLL2NormalizeLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLL2NormalizeLayerKernel &operator=(CLL2NormalizeLayerKernel &&) = default; + /** Default destructor */ + ~CLL2NormalizeLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QS8, QS16, F32. + * @param[in] sum Sum values tensor. Data types supported: same as @p input. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 + * @param[in] epsilon Lower bound value for the normalization. + */ + void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_sum; + ICLTensor *_output; + unsigned int _axis; + float _epsilon; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/GCKernels.h b/arm_compute/core/GLES_COMPUTE/GCKernels.h index 57d11d5f18..9831e25299 100644 --- a/arm_compute/core/GLES_COMPUTE/GCKernels.h +++ b/arm_compute/core/GLES_COMPUTE/GCKernels.h @@ -29,7 +29,7 @@ #include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h deleted file mode 100644 index 9a34a9a9c5..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ -#define __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the depth concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class GCDepthConcatenateKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCDepthConcatenateKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCDepthConcatenateKernel(const GCDepthConcatenateKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCDepthConcatenateKernel &operator=(const GCDepthConcatenateKernel &) = delete; - /** Allow instances of this class to be moved */ - GCDepthConcatenateKernel(GCDepthConcatenateKernel &&) = default; - /** Allow instances of this class to be moved */ - GCDepthConcatenateKernel &operator=(GCDepthConcatenateKernel &&) = default; - /** Default destructor */ - ~GCDepthConcatenateKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output); - - // Inherited methods overridden: - void run(const Window &window) override; - BorderSize border_size() const override; - -private: - const IGCTensor *_input; - IGCTensor *_output; - int _top_bottom; - int _left_right; -}; -} -#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h new file mode 100644 index 0000000000..ce220cc564 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ +#define __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class GCDepthConcatenateLayerKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCDepthConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDepthConcatenateLayerKernel(const GCDepthConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDepthConcatenateLayerKernel &operator=(const GCDepthConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + GCDepthConcatenateLayerKernel(GCDepthConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + GCDepthConcatenateLayerKernel &operator=(GCDepthConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~GCDepthConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: F16/F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const IGCTensor *_input; + IGCTensor *_output; + int _top_bottom; + int _left_right; +}; +} +#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h index b23e2ac5a3..6c31fa4fb1 100644 --- a/arm_compute/core/NEON/NEKernels.h +++ b/arm_compute/core/NEON/NEKernels.h @@ -44,9 +44,9 @@ #include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" #include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h" #include "arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h" #include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h" #include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h" #include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h" @@ -83,7 +83,7 @@ #include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" #include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h" -#include "arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h" +#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h" #include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" #include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" #include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h deleted file mode 100644 index 784dfc3f5c..0000000000 --- a/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ -#define __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the depth concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class NEDepthConcatenateKernel : public INEKernel -{ -public: - /** Default constructor */ - NEDepthConcatenateKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthConcatenateKernel(const NEDepthConcatenateKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthConcatenateKernel &operator=(const NEDepthConcatenateKernel &) = delete; - /** Allow instances of this class to be moved */ - NEDepthConcatenateKernel(NEDepthConcatenateKernel &&) = default; - /** Allow instances of this class to be moved */ - NEDepthConcatenateKernel &operator=(NEDepthConcatenateKernel &&) = default; - /** Default destructor */ - ~NEDepthConcatenateKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: QS8/QS16/F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const ITensor *input, unsigned int depth_offset, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - using DepthConcatFunction = void(const ITensor *in, ITensor *out, std::pair start_xy, int depth_offset, const Window &window); - -private: - DepthConcatFunction *_func; - const ITensor *_input; - ITensor *_output; - int _top_bottom; - int _left_right; - unsigned int _depth_offset; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h new file mode 100644 index 0000000000..6029873f22 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ +#define __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class NEDepthConcatenateLayerKernel : public INEKernel +{ +public: + /** Default constructor */ + NEDepthConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConcatenateLayerKernel(const NEDepthConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConcatenateLayerKernel &operator=(const NEDepthConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDepthConcatenateLayerKernel(NEDepthConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDepthConcatenateLayerKernel &operator=(NEDepthConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~NEDepthConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: QS8/QS16/F16/F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const ITensor *input, unsigned int depth_offset, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + using DepthConcatFunction = void(const ITensor *in, ITensor *out, std::pair start_xy, int depth_offset, const Window &window); + +private: + DepthConcatFunction *_func; + const ITensor *_input; + ITensor *_output; + int _top_bottom; + int _left_right; + unsigned int _depth_offset; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h deleted file mode 100644 index 332406f239..0000000000 --- a/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2016, 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__ -#define __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__ - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Depth conversion kernel */ -class NEDepthConvertKernel : public INEKernel -{ -public: - /** Default constructor*/ - NEDepthConvertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthConvertKernel(const NEDepthConvertKernel &) = delete; - /** Default move constructor */ - NEDepthConvertKernel(NEDepthConvertKernel &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthConvertKernel &operator=(const NEDepthConvertKernel &) = delete; - /** Default move assignment operator */ - NEDepthConvertKernel &operator=(NEDepthConvertKernel &&) = default; - /** Set the input and output of the kernel - * - * Valid conversions Input -> Output : - * - * - QS8 -> QS8, F32 - * - U8 -> U16, S16, S32 - * - U16 -> U8, U32 - * - S16 -> U8, S32 - * - QS16 -> QS16, F32 - * - F32 -> QS8 - * - * @warning In case of in-place fixed point position conversion make sure that configure has been called - * before the updated tensor is used in other functions, as the TensorInfo of the tensor will be - * altered. In-place is only supported for QS8 -> QS8, QS16 -> QS16. - * - * @param[in, out] input The input tensor to convert (Written in case of in-place computation). Data types supported: U8/QS8/U16/S16/F32. - * @param[out] output The output tensor. Can be null in case of in-place computation. Data types supported: U8/QS8/U16/S16/U32/S32/F32. - * @param[in] policy Conversion policy. - * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. - * In case of fixed point position conversion, it specifies the new fixed point position, if operation is in-place. - */ - void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - ITensor *_input; - ITensor *_output; - ConvertPolicy _policy; - uint32_t _shift; - int _fixed_point_position_input; - int _fixed_point_position_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h new file mode 100644 index 0000000000..af51ded87a --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__ +#define __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Depth conversion kernel */ +class NEDepthConvertLayerKernel : public INEKernel +{ +public: + /** Default constructor*/ + NEDepthConvertLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConvertLayerKernel(const NEDepthConvertLayerKernel &) = delete; + /** Default move constructor */ + NEDepthConvertLayerKernel(NEDepthConvertLayerKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConvertLayerKernel &operator=(const NEDepthConvertLayerKernel &) = delete; + /** Default move assignment operator */ + NEDepthConvertLayerKernel &operator=(NEDepthConvertLayerKernel &&) = default; + /** Set the input and output of the kernel + * + * Valid conversions Input -> Output : + * + * - QS8 -> QS8, F32 + * - U8 -> U16, S16, S32 + * - U16 -> U8, U32 + * - S16 -> U8, S32 + * - QS16 -> QS16, F32 + * - F32 -> QS8 + * + * @warning In case of in-place fixed point position conversion make sure that configure has been called + * before the updated tensor is used in other functions, as the TensorInfo of the tensor will be + * altered. In-place is only supported for QS8 -> QS8, QS16 -> QS16. + * + * @param[in, out] input The input tensor to convert (Written in case of in-place computation). Data types supported: U8/QS8/U16/S16/F32. + * @param[out] output The output tensor. Can be null in case of in-place computation. Data types supported: U8/QS8/U16/S16/U32/S32/F32. + * @param[in] policy Conversion policy. + * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. + * In case of fixed point position conversion, it specifies the new fixed point position, if operation is in-place. + */ + void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + ITensor *_input; + ITensor *_output; + ConvertPolicy _policy; + uint32_t _shift; + int _fixed_point_position_input; + int _fixed_point_position_output; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.h deleted file mode 100644 index a32a06b61d..0000000000 --- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__ -#define __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__ - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor. - */ -class NEDepthwiseConvolution3x3Kernel : public INEKernel -{ -public: - /** Default constructor */ - NEDepthwiseConvolution3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolution3x3Kernel(const NEDepthwiseConvolution3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolution3x3Kernel &operator=(const NEDepthwiseConvolution3x3Kernel &) = delete; - /** Default Move Constructor. */ - NEDepthwiseConvolution3x3Kernel(NEDepthwiseConvolution3x3Kernel &&) = default; - /** Default move assignment operator. */ - NEDepthwiseConvolution3x3Kernel &operator=(NEDepthwiseConvolution3x3Kernel &&) = default; - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] input Source tensor. DataType supported: F32. - * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - */ - void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - BorderSize _border_size; - const ITensor *_input; - ITensor *_output; - const ITensor *_weights; - PadStrideInfo _conv_info; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__ */ \ No newline at end of file diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h new file mode 100644 index 0000000000..b8f01cb635 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__ +#define __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor. + */ +class NEDepthwiseConvolutionLayer3x3Kernel : public INEKernel +{ +public: + /** Default constructor */ + NEDepthwiseConvolutionLayer3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthwiseConvolutionLayer3x3Kernel(const NEDepthwiseConvolutionLayer3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthwiseConvolutionLayer3x3Kernel &operator=(const NEDepthwiseConvolutionLayer3x3Kernel &) = delete; + /** Default Move Constructor. */ + NEDepthwiseConvolutionLayer3x3Kernel(NEDepthwiseConvolutionLayer3x3Kernel &&) = default; + /** Default move assignment operator. */ + NEDepthwiseConvolutionLayer3x3Kernel &operator=(NEDepthwiseConvolutionLayer3x3Kernel &&) = default; + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] input Source tensor. DataType supported: F32. + * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + */ + void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; + const ITensor *_input; + ITensor *_output; + const ITensor *_weights; + PadStrideInfo _conv_info; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__ */ \ No newline at end of file diff --git a/arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h b/arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h deleted file mode 100644 index fbbe4bee99..0000000000 --- a/arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NEL2NORMALIZEKERNEL_H__ -#define __ARM_COMPUTE_NEL2NORMALIZEKERNEL_H__ - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */ -class NEL2NormalizeKernel : public INEKernel -{ -public: - /** Default constructor */ - NEL2NormalizeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEL2NormalizeKernel(const NEL2NormalizeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEL2NormalizeKernel &operator=(const NEL2NormalizeKernel &) = delete; - /** Allow instances of this class to be moved */ - NEL2NormalizeKernel(NEL2NormalizeKernel &&) = default; - /** Allow instances of this class to be moved */ - NEL2NormalizeKernel &operator=(NEL2NormalizeKernel &&) = default; - /** Default destructor */ - ~NEL2NormalizeKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F32. - * @param[in] sum Sum values tensor. Data types supported: same as @p input. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 - * @param[in] epsilon Lower bound value for the normalization. - */ - void configure(const ITensor *input, const ITensor *sum, ITensor *output, unsigned int axis, float epsilon); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - const ITensor *_sum; - ITensor *_output; - unsigned int _axis; - float _epsilon; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_NEL2NORMALIZEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h b/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h new file mode 100644 index 0000000000..7aa5116b68 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEL2NORMALIZEKERNEL_H__ +#define __ARM_COMPUTE_NEL2NORMALIZEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */ +class NEL2NormalizeLayerKernel : public INEKernel +{ +public: + /** Default constructor */ + NEL2NormalizeLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEL2NormalizeLayerKernel(const NEL2NormalizeLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEL2NormalizeLayerKernel &operator=(const NEL2NormalizeLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEL2NormalizeLayerKernel(NEL2NormalizeLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEL2NormalizeLayerKernel &operator=(NEL2NormalizeLayerKernel &&) = default; + /** Default destructor */ + ~NEL2NormalizeLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F32. + * @param[in] sum Sum values tensor. Data types supported: same as @p input. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 + * @param[in] epsilon Lower bound value for the normalization. + */ + void configure(const ITensor *input, const ITensor *sum, ITensor *output, unsigned int axis, float epsilon); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + const ITensor *_sum; + ITensor *_output; + unsigned int _axis; + float _epsilon; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_NEL2NORMALIZEKERNEL_H__ */ diff --git a/arm_compute/graph/nodes/L2NormalizeLayer.h b/arm_compute/graph/nodes/L2NormalizeLayer.h index fc2bbc2d19..a423306bd2 100644 --- a/arm_compute/graph/nodes/L2NormalizeLayer.h +++ b/arm_compute/graph/nodes/L2NormalizeLayer.h @@ -33,7 +33,7 @@ namespace arm_compute { namespace graph { -/** L2Normalize layer node */ +/** L2NormalizeLayer layer node */ class L2NormalizeLayer final : public INode { public: diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h index 9a20769ca1..f6ecef7a51 100644 --- a/arm_compute/runtime/CL/CLFunctions.h +++ b/arm_compute/runtime/CL/CLFunctions.h @@ -42,9 +42,9 @@ #include "arm_compute/runtime/CL/functions/CLColorConvert.h" #include "arm_compute/runtime/CL/functions/CLConvolution.h" #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h" -#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" -#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h" +#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h" +#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h" #include "arm_compute/runtime/CL/functions/CLDerivative.h" @@ -72,7 +72,7 @@ #include "arm_compute/runtime/CL/functions/CLHarrisCorners.h" #include "arm_compute/runtime/CL/functions/CLHistogram.h" #include "arm_compute/runtime/CL/functions/CLIntegralImage.h" -#include "arm_compute/runtime/CL/functions/CLL2Normalize.h" +#include "arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h" #include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h" #include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h" #include "arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h" diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenate.h b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h deleted file mode 100644 index 77997f6bd1..0000000000 --- a/arm_compute/runtime/CL/functions/CLDepthConcatenate.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ -#define __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ - -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" - -#include -#include - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: - * - * -# @ref CLFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) - * -# @ref CLDepthConcatenateKernel - * - */ -class CLDepthConcatenate : public IFunction -{ -public: - /** Default constructor */ - CLDepthConcatenate(); - /** Initialise the kernel's inputs vector and output. - * - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QS8/QS16/F16/F32. - * @param[out] output Output tensor. Data types supported: Same as @p input. - */ - void configure(std::vector inputs_vector, ICLTensor *output); - - // Inherited methods overridden: - void run() override; - -private: - std::vector _inputs_vector; - std::unique_ptr _concat_kernels_vector; - std::unique_ptr _border_handlers_vector; - unsigned int _num_inputs; -}; -} -#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h new file mode 100644 index 0000000000..00b3b66c97 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ +#define __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: + * + * -# @ref CLFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) + * -# @ref CLDepthConcatenateLayerKernel + * + */ +class CLDepthConcatenateLayer : public IFunction +{ +public: + /** Default constructor */ + CLDepthConcatenateLayer(); + /** Initialise the kernel's inputs vector and output. + * + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QS8/QS16/F16/F32. + * @param[out] output Output tensor. Data types supported: Same as @p input. + */ + void configure(std::vector inputs_vector, ICLTensor *output); + + // Inherited methods overridden: + void run() override; + +private: + std::vector _inputs_vector; + std::unique_ptr _concat_kernels_vector; + std::unique_ptr _border_handlers_vector; + unsigned int _num_inputs; +}; +} +#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDepthConvert.h b/arm_compute/runtime/CL/functions/CLDepthConvert.h deleted file mode 100644 index 9a4c63dd6d..0000000000 --- a/arm_compute/runtime/CL/functions/CLDepthConvert.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2016, 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLDEPTHCONVERT_H__ -#define __ARM_COMPUTE_CLDEPTHCONVERT_H__ - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -#include - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLDepthConvertKernel. */ -class CLDepthConvert : public ICLSimpleFunction -{ -public: - /** Initialize the function's source, destination - * - * Input data type must be different than output data type. - * - * Valid conversions Input -> Output : - * - * - QS8 -> F32 - * - QS16 -> F32 - * - U8 -> U16, S16, U32, S32 - * - U16 -> U8, U32, S32 - * - S16 -> U8, U32, S32 - * - U32 -> U8, U16, S16 - * - S32 -> U8, U16, S16 - * - F32 -> QS8, QS16 - * - * @param[in] input The input tensor to convert. Data types supported: QS8/U8/U16/S16/Q16/U32/S32/F32. - * @param[out] output The output tensor. Data types supported: QS8/U8/U16/S16/QS16/U32/S32/F32. - * @param[in] policy Conversion policy. - * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. - * It is not used on fixed point conversion. - */ - void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); -}; -} -#endif /*__ARM_COMPUTE_CLDEPTHCONVERT_H__*/ diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h new file mode 100644 index 0000000000..c84dc15508 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHCONVERT_H__ +#define __ARM_COMPUTE_CLDEPTHCONVERT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLDepthConvertLayerKernel. */ +class CLDepthConvertLayer : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * Input data type must be different than output data type. + * + * Valid conversions Input -> Output : + * + * - QS8 -> F32 + * - QS16 -> F32 + * - U8 -> U16, S16, U32, S32 + * - U16 -> U8, U32, S32 + * - S16 -> U8, U32, S32 + * - U32 -> U8, U16, S16 + * - S32 -> U8, U16, S16 + * - F32 -> QS8, QS16 + * + * @param[in] input The input tensor to convert. Data types supported: QS8/U8/U16/S16/Q16/U32/S32/F32. + * @param[out] output The output tensor. Data types supported: QS8/U8/U16/S16/QS16/U32/S32/F32. + * @param[in] policy Conversion policy. + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + * It is not used on fixed point conversion. + */ + void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); +}; +} +#endif /*__ARM_COMPUTE_CLDEPTHCONVERT_H__*/ diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h deleted file mode 100644 index 40eb8523fb..0000000000 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ -#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ - -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/IFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to execute a depthwise convolution for kernel size 3x3xC. This function calls the following OpenCL kernels: - * - * -# @ref CLDepthwiseConvolution3x3Kernel - * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0) - * - */ -class CLDepthwiseConvolution3x3 : public IFunction -{ -public: - /** Default constructor */ - CLDepthwiseConvolution3x3(); - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - */ - void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); - - // Inherited methods overriden: - void run() override; - -private: - CLDepthwiseConvolution3x3Kernel _kernel; - CLFillBorderKernel _border_handler; -}; - -/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels: - * - * -# @ref CLDepthwiseIm2ColKernel - * -# @ref CLGEMMMatrixVectorMultiplyKernel - * -# @ref CLDepthwiseWeightsReshapeKernel - * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0) - * - */ -class CLDepthwiseConvolution : public IFunction -{ -public: - /** Default constructor */ - CLDepthwiseConvolution(); - /** Initialize the function's source, destination, weights and convolution information. - * - * @param[in, out] input Source tensor. Data type supported: F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - */ - void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); - - // Inherited methods overriden: - void run() override; - -private: - CLDepthwiseIm2ColKernel _im2col_kernel; - CLDepthwiseWeightsReshapeKernel _weights_reshape_kernel; - CLGEMMMatrixVectorMultiplyKernel _v2mm_kernel; - CLDepthwiseVectorToTensorKernel _vector_to_tensor_kernel; - CLFillBorderKernel _v2mm_input_fill_border; - CLFillBorderKernel _v2mm_weights_fill_border; - CLTensor _input_reshaped; - CLTensor _weights_reshaped; - CLTensor _v2mm_output; -}; -} -#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h new file mode 100644 index 0000000000..f7899415d2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ +#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ + +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute a depthwise convolution for kernel size 3x3xC. This function calls the following OpenCL kernels: + * + * -# @ref CLDepthwiseConvolutionLayer3x3Kernel + * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0) + * + */ +class CLDepthwiseConvolutionLayer3x3 : public IFunction +{ +public: + /** Default constructor */ + CLDepthwiseConvolutionLayer3x3(); + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). + * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + */ + void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overriden: + void run() override; + +private: + CLDepthwiseConvolutionLayer3x3Kernel _kernel; + CLFillBorderKernel _border_handler; +}; + +/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels: + * + * -# @ref CLDepthwiseIm2ColKernel + * -# @ref CLGEMMMatrixVectorMultiplyKernel + * -# @ref CLDepthwiseWeightsReshapeKernel + * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0) + * + */ +class CLDepthwiseConvolutionLayer : public IFunction +{ +public: + /** Default constructor */ + CLDepthwiseConvolutionLayer(); + /** Initialize the function's source, destination, weights and convolution information. + * + * @param[in, out] input Source tensor. Data type supported: F32. (Written to only for border filling). + * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + */ + void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overriden: + void run() override; + +private: + CLDepthwiseIm2ColKernel _im2col_kernel; + CLDepthwiseWeightsReshapeKernel _weights_reshape_kernel; + CLGEMMMatrixVectorMultiplyKernel _v2mm_kernel; + CLDepthwiseVectorToTensorKernel _vector_to_tensor_kernel; + CLFillBorderKernel _v2mm_input_fill_border; + CLFillBorderKernel _v2mm_weights_fill_border; + CLTensor _input_reshaped; + CLTensor _weights_reshaped; + CLTensor _v2mm_output; +}; +} +#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h index a38446293b..27cee5ed3b 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h @@ -27,7 +27,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" -#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h" +#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h" #include "arm_compute/runtime/IFunction.h" @@ -39,7 +39,7 @@ class ICLTensor; /** Basic function to execute depthwise convolution. This function calls the following OpenCL kernels and function: * - * -# @ref CLDepthwiseConvolution + * -# @ref CLDepthwiseConvolutionLayer * -# @ref CLDirectConvolutionLayer * */ @@ -72,8 +72,8 @@ public: void run() override; private: - CLDepthwiseConvolution _depthwise_conv; - CLDirectConvolutionLayer _pointwise_conv; + CLDepthwiseConvolutionLayer _depthwise_conv; + CLDirectConvolutionLayer _pointwise_conv; }; } #endif /*__ARM_COMPUTE_CL_DEPTHWISE_SEPARABLE_CONVOLUTION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLL2Normalize.h b/arm_compute/runtime/CL/functions/CLL2Normalize.h deleted file mode 100644 index 20af54eda2..0000000000 --- a/arm_compute/runtime/CL/functions/CLL2Normalize.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLL2NORMALIZE_H__ -#define __ARM_COMPUTE_CLL2NORMALIZE_H__ - -#include "arm_compute/core/CL/kernels/CLL2NormalizeKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLMemoryGroup.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" -#include "arm_compute/runtime/CL/functions/CLReductionOperation.h" -#include "arm_compute/runtime/IMemoryManager.h" - -#include -#include - -namespace arm_compute -{ -class ICLTensor; - -/** Perform reduction operation. - */ -class CLL2Normalize : public IFunction -{ -public: - /** Constructor */ - CLL2Normalize(std::shared_ptr memory_manager = nullptr); - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QS8, QS16, F32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 - * @param[in] epsilon Lower bound value for the normalization. - */ - void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, float epsilon = 1e-12); - - // Inherited methods overridden: - void run() override; - -private: - CLMemoryGroup _memory_group; - CLReductionOperation _reduce_func; - CLL2NormalizeKernel _normalize_kernel; - CLTensor _sumsq; -}; -} -#endif /*__ARM_COMPUTE_CLL2NORMALIZE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h new file mode 100644 index 0000000000..8aea7a641b --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLL2NORMALIZE_H__ +#define __ARM_COMPUTE_CLL2NORMALIZE_H__ + +#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLMemoryGroup.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/runtime/CL/functions/CLReductionOperation.h" +#include "arm_compute/runtime/IMemoryManager.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Perform reduction operation. + */ +class CLL2NormalizeLayer : public IFunction +{ +public: + /** Constructor */ + CLL2NormalizeLayer(std::shared_ptr memory_manager = nullptr); + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QS8, QS16, F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 + * @param[in] epsilon Lower bound value for the normalization. + */ + void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, float epsilon = 1e-12); + + // Inherited methods overridden: + void run() override; + +private: + CLMemoryGroup _memory_group; + CLReductionOperation _reduce_func; + CLL2NormalizeLayerKernel _normalize_kernel; + CLTensor _sumsq; +}; +} +#endif /*__ARM_COMPUTE_CLL2NORMALIZE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h index 0c6708aa73..585a013e31 100644 --- a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h +++ b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h @@ -27,7 +27,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLPyramid.h" #include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h" -#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h" #include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" #include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h" #include "arm_compute/runtime/IFunction.h" @@ -77,7 +77,7 @@ private: CLGaussianPyramidHalf _gaussian_pyr_function; std::unique_ptr _convf; std::unique_ptr _subf; - CLDepthConvert _depth_function; + CLDepthConvertLayer _depth_function; CLPyramid _gauss_pyr; CLPyramid _conv_pyr; }; diff --git a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h index 4bc7eb65ce..4a676c85a0 100644 --- a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h +++ b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h @@ -27,7 +27,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLPyramid.h" #include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h" -#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h" #include "arm_compute/runtime/CL/functions/CLScale.h" #include "arm_compute/runtime/IFunction.h" @@ -43,7 +43,7 @@ using ICLImage = ICLTensor; * * -# @ref CLArithmeticAddition * -# @ref CLScale - * -# @ref CLDepthConvert + * -# @ref CLDepthConvertLayer * * This function reconstructs the original image from a Laplacian Image Pyramid. * @@ -85,7 +85,7 @@ private: CLPyramid _tmp_pyr; std::unique_ptr _addf; std::unique_ptr _scalef; - CLDepthConvert _depthf; + CLDepthConvertLayer _depthf; }; } #endif /*__ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h index 8a345c5fab..e76d4efb27 100644 --- a/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h +++ b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h @@ -28,7 +28,7 @@ #include "arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h" #include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h" #include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h" -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h" #include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h" #include "arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h" #include "arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h" diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h deleted file mode 100644 index 801dc0e111..0000000000 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ -#define __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ - -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" - -#include -#include - -namespace arm_compute -{ -class IGCTensor; - -/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: - * - * -# @ref GCFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) - * -# @ref GCDepthConcatenateKernel - * - */ -class GCDepthConcatenate : public IFunction -{ -public: - /** Default constructor */ - GCDepthConcatenate(); - /** Initialise the kernel's inputs vector and output. - * - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F16/F32. - * @param[out] output Output tensor. Data types supported: Same as @p input. - */ - void configure(std::vector inputs_vector, IGCTensor *output); - - // Inherited methods overridden: - void run() override; - -private: - std::unique_ptr _concat_kernels_vector; - std::unique_ptr _border_handlers_vector; - unsigned int _num_inputs; -}; -} -#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h new file mode 100644 index 0000000000..1151399f92 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ +#define __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ + +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include + +namespace arm_compute +{ +class IGCTensor; + +/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: + * + * -# @ref GCFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) + * -# @ref GCDepthConcatenateLayerKernel + * + */ +class GCDepthConcatenateLayer : public IFunction +{ +public: + /** Default constructor */ + GCDepthConcatenateLayer(); + /** Initialise the kernel's inputs vector and output. + * + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F16/F32. + * @param[out] output Output tensor. Data types supported: Same as @p input. + */ + void configure(std::vector inputs_vector, IGCTensor *output); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _concat_kernels_vector; + std::unique_ptr _border_handlers_vector; + unsigned int _num_inputs; +}; +} +#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h index 2e8c084371..08852cf368 100644 --- a/arm_compute/runtime/NEON/NEFunctions.h +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -45,9 +45,9 @@ #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" -#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" +#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" #include "arm_compute/runtime/NEON/functions/NEDerivative.h" @@ -77,7 +77,7 @@ #include "arm_compute/runtime/NEON/functions/NEHistogram.h" #include "arm_compute/runtime/NEON/functions/NEIm2Col.h" #include "arm_compute/runtime/NEON/functions/NEIntegralImage.h" -#include "arm_compute/runtime/NEON/functions/NEL2Normalize.h" +#include "arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h" #include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h" #include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h" #include "arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h" diff --git a/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h b/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h deleted file mode 100644 index cc65099575..0000000000 --- a/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ -#define __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" - -#include -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: - * - * -# @ref NEFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) - * -# @ref NEDepthConcatenateKernel - * - */ -class NEDepthConcatenate : public IFunction -{ -public: - /** Default constructor */ - NEDepthConcatenate(); - /** Initialise the kernel's inputs vector and output. - * - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QS8/QS16/F16/F32. - * @param[out] output Output tensor. Data types supported: Same as @p inputs_vector. - */ - void configure(std::vector inputs_vector, ITensor *output); - - // Inherited methods overridden: - void run() override; - -private: - std::vector _inputs_vector; - std::unique_ptr _concat_kernels_vector; - std::unique_ptr _border_handlers_vector; - unsigned int _num_inputs; -}; -} -#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h new file mode 100644 index 0000000000..5b63b70634 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ +#define __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: + * + * -# @ref NEFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) + * -# @ref NEDepthConcatenateLayerKernel + * + */ +class NEDepthConcatenateLayer : public IFunction +{ +public: + /** Default constructor */ + NEDepthConcatenateLayer(); + /** Initialise the kernel's inputs vector and output. + * + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QS8/QS16/F16/F32. + * @param[out] output Output tensor. Data types supported: Same as @p inputs_vector. + */ + void configure(std::vector inputs_vector, ITensor *output); + + // Inherited methods overridden: + void run() override; + +private: + std::vector _inputs_vector; + std::unique_ptr _concat_kernels_vector; + std::unique_ptr _border_handlers_vector; + unsigned int _num_inputs; +}; +} +#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvert.h b/arm_compute/runtime/NEON/functions/NEDepthConvert.h deleted file mode 100644 index 37f7293fb3..0000000000 --- a/arm_compute/runtime/NEON/functions/NEDepthConvert.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2016, 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NEDEPTHCONVERT_H__ -#define __ARM_COMPUTE_NEDEPTHCONVERT_H__ - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/**Basic function to run @ref NEDepthConvertKernel */ -class NEDepthConvert : public INESimpleFunction -{ -public: - /* Contructor */ - NEDepthConvert() = default; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEDepthConvert(const NEDepthConvert &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - const NEDepthConvert &operator=(const NEDepthConvert &) = delete; - /** Initialize the function's source, destination - * - * Valid conversions Input -> Output : - * QS8 -> QS8, F32 - * U8 -> U16, S16, S32 - * U16 -> U8, U32 - * S16 -> U8, S32 - * QS16 -> QS16, F32 - * F32 -> QS8, QS16 - * - * @warning In case of in-place fixed point position conversion make sure that configure has been called - * before the updated tensor is used in other functions, as the TensorInfo of the tensor will be - * altered. In-place is only supported for QS8 -> QS8, QS16 -> QS16. - * - * @param[in, out] input The input tensor to convert (Written in case of in-place computation). Data types supported: U8/QS8/U16/S16/F32. - * @param[out] output The output tensor. Can be null in case of in-place computation. Data types supported: U8/QS8/U16/S16/U32/S32/F32. - * @param[in] policy Conversion policy. - * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. - * In case of fixed point position conversion, it specifies the new fixed point position, if operation is in-place. - */ - void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); -}; -} -#endif /*__ARM_COMPUTE_NEDEPTHCONVERT_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h new file mode 100644 index 0000000000..b235e87b4a --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDEPTHCONVERT_H__ +#define __ARM_COMPUTE_NEDEPTHCONVERT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/**Basic function to run @ref NEDepthConvertLayerKernel */ +class NEDepthConvertLayer : public INESimpleFunction +{ +public: + /* Contructor */ + NEDepthConvertLayer() = default; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEDepthConvertLayer(const NEDepthConvertLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + const NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete; + /** Initialize the function's source, destination + * + * Valid conversions Input -> Output : + * QS8 -> QS8, F32 + * U8 -> U16, S16, S32 + * U16 -> U8, U32 + * S16 -> U8, S32 + * QS16 -> QS16, F32 + * F32 -> QS8, QS16 + * + * @warning In case of in-place fixed point position conversion make sure that configure has been called + * before the updated tensor is used in other functions, as the TensorInfo of the tensor will be + * altered. In-place is only supported for QS8 -> QS8, QS16 -> QS16. + * + * @param[in, out] input The input tensor to convert (Written in case of in-place computation). Data types supported: U8/QS8/U16/S16/F32. + * @param[out] output The output tensor. Can be null in case of in-place computation. Data types supported: U8/QS8/U16/S16/U32/S32/F32. + * @param[in] policy Conversion policy. + * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. + * In case of fixed point position conversion, it specifies the new fixed point position, if operation is in-place. + */ + void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); +}; +} +#endif /*__ARM_COMPUTE_NEDEPTHCONVERT_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h deleted file mode 100644 index f2c209cd80..0000000000 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ -#define __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ - -#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h" -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute a depthwise convolution for kernel size 3x3xC. This function calls the following NEON kernels: - * - * -# @ref NEDepthwiseConvolution3x3 - * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) - * - */ -class NEDepthwiseConvolution3x3 : public IFunction -{ -public: - /** Default constructor */ - NEDepthwiseConvolution3x3(); - /** Initialize the function's source, destination, kernels and border_size. - * - * @param[in, out] input Source tensor. Data type supported: F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); - - // Inherited methods overriden: - void run() override; - -private: - NEDepthwiseConvolution3x3Kernel _kernel; - NEDirectConvolutionLayerBiasAccumulateKernel _bias_kernel; - NEFillBorderKernel _border_handler; - bool _has_bias; -}; - -/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels: - * - * -# @ref NEDepthwiseIm2ColKernel - * -# @ref NEDepthwiseWeightsReshapeKernel - * -# @ref NEGEMMMatrixVectorMultiplyKernel - * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) - * - */ -class NEDepthwiseConvolution : public IFunction -{ -public: - /** Default constructor */ - NEDepthwiseConvolution(); - /** Initialize the function's source, destination, weights and convolution information. - * - * @param[in, out] input Source tensor. Data type supported: F32. (Written to only for border filling). - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); - - // Inherited methods overriden: - void run() override; - -private: - NEDepthwiseIm2ColKernel _im2col_kernel; - NEDepthwiseWeightsReshapeKernel _weights_reshape_kernel; - NEGEMMMatrixVectorMultiplyKernel _v2mm_kernel; - NEDepthwiseVectorToTensorKernel _vector_to_tensor_kernel; - Tensor _input_reshaped; - Tensor _weights_reshaped; - Tensor _v2mm_output; -}; -} -#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */ \ No newline at end of file diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h new file mode 100644 index 0000000000..0da16ab2a9 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ +#define __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ + +#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h" +#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h" +#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h" +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute a depthwise convolution for kernel size 3x3xC. This function calls the following NEON kernels: + * + * -# @ref NEDepthwiseConvolutionLayer3x3 + * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) + * + */ +class NEDepthwiseConvolutionLayer3x3 : public IFunction +{ +public: + /** Default constructor */ + NEDepthwiseConvolutionLayer3x3(); + /** Initialize the function's source, destination, kernels and border_size. + * + * @param[in, out] input Source tensor. Data type supported: F32. (Written to only for border filling). + * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + */ + void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overriden: + void run() override; + +private: + NEDepthwiseConvolutionLayer3x3Kernel _kernel; + NEDirectConvolutionLayerBiasAccumulateKernel _bias_kernel; + NEFillBorderKernel _border_handler; + bool _has_bias; +}; + +/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels: + * + * -# @ref NEDepthwiseIm2ColKernel + * -# @ref NEDepthwiseWeightsReshapeKernel + * -# @ref NEGEMMMatrixVectorMultiplyKernel + * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) + * + */ +class NEDepthwiseConvolutionLayer : public IFunction +{ +public: + /** Default constructor */ + NEDepthwiseConvolutionLayer(); + /** Initialize the function's source, destination, weights and convolution information. + * + * @param[in, out] input Source tensor. Data type supported: F32. (Written to only for border filling). + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + */ + void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overriden: + void run() override; + +private: + NEDepthwiseIm2ColKernel _im2col_kernel; + NEDepthwiseWeightsReshapeKernel _weights_reshape_kernel; + NEGEMMMatrixVectorMultiplyKernel _v2mm_kernel; + NEDepthwiseVectorToTensorKernel _vector_to_tensor_kernel; + Tensor _input_reshaped; + Tensor _weights_reshaped; + Tensor _v2mm_output; +}; +} +#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */ \ No newline at end of file diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h index 3f4c1389f0..0562c07515 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h @@ -27,7 +27,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/INESimpleFunction.h" -#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h" +#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" #include "arm_compute/runtime/Tensor.h" @@ -39,7 +39,7 @@ class ITensor; /** Basic function to execute depthwise convolution. This function calls the following NEON kernels and function: * - * -# @ref NEDepthwiseConvolution + * -# @ref NEDepthwiseConvolutionLayer * -# @ref NEDirectConvolutionLayer * */ @@ -72,8 +72,8 @@ public: void run() override; private: - NEDepthwiseConvolution _depthwise_conv; - NEDirectConvolutionLayer _pointwise_conv; + NEDepthwiseConvolutionLayer _depthwise_conv; + NEDirectConvolutionLayer _pointwise_conv; }; } #endif /*__ARM_COMPUTE_NEON_DEPTHWISE_SEPARABLE_CONVOLUTION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEL2Normalize.h b/arm_compute/runtime/NEON/functions/NEL2Normalize.h deleted file mode 100644 index 95d5186c13..0000000000 --- a/arm_compute/runtime/NEON/functions/NEL2Normalize.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NEL2NORMALIZE_H__ -#define __ARM_COMPUTE_NEL2NORMALIZE_H__ - -#include "arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" -#include "arm_compute/runtime/Tensor.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to perform a L2 normalization on a given axis. - * - * This function runs the following kernels: - * -# @ref NEReductionOperation - * -# @ref NEL2NormalizeKernel - */ -class NEL2Normalize : public IFunction -{ -public: - /** Constructor */ - NEL2Normalize(std::shared_ptr memory_manager = nullptr); - /** Set the input and output tensors. - * - * @param[in, out] input Source tensor. Data types supported: F32. (Written to only for border_size != 0) - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 - * @param[in] epsilon Lower bound value for the normalization. - */ - void configure(ITensor *input, ITensor *output, unsigned int axis, float epsilon = 1e-12); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NEReductionOperation _reduce_func; - NEL2NormalizeKernel _normalize_kernel; - Tensor _sumsq; -}; -} -#endif /* __ARM_COMPUTE_NEL2NORMALIZE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h new file mode 100644 index 0000000000..100e239406 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEL2NORMALIZE_H__ +#define __ARM_COMPUTE_NEL2NORMALIZE_H__ + +#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to perform a L2 normalization on a given axis. + * + * This function runs the following kernels: + * -# @ref NEReductionOperation + * -# @ref NEL2NormalizeLayerKernel + */ +class NEL2NormalizeLayer : public IFunction +{ +public: + /** Constructor */ + NEL2NormalizeLayer(std::shared_ptr memory_manager = nullptr); + /** Set the input and output tensors. + * + * @param[in, out] input Source tensor. Data types supported: F32. (Written to only for border_size != 0) + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 + * @param[in] epsilon Lower bound value for the normalization. + */ + void configure(ITensor *input, ITensor *output, unsigned int axis, float epsilon = 1e-12); + + // Inherited methods overridden: + void run() override; + +private: + MemoryGroup _memory_group; + NEReductionOperation _reduce_func; + NEL2NormalizeLayerKernel _normalize_kernel; + Tensor _sumsq; +}; +} +#endif /* __ARM_COMPUTE_NEL2NORMALIZE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h index 991ae7c293..baa4b7b1a5 100644 --- a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h +++ b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h @@ -27,7 +27,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" #include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" #include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" #include "arm_compute/runtime/Pyramid.h" @@ -79,7 +79,7 @@ private: std::unique_ptr _subf; Pyramid _gauss_pyr; Pyramid _conv_pyr; - NEDepthConvert _depth_function; + NEDepthConvertLayer _depth_function; }; } #endif /*__ARM_COMPUTE_NELAPLACIANPYRAMID_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h index 4139733499..3d423607a3 100644 --- a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h +++ b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h @@ -27,7 +27,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" #include "arm_compute/runtime/NEON/functions/NEScale.h" #include "arm_compute/runtime/Pyramid.h" @@ -43,7 +43,7 @@ using IImage = ITensor; * * -# @ref NEArithmeticAddition * -# @ref NEScale - * -# @ref NEDepthConvert + * -# @ref NEDepthConvertLayer * * This function reconstructs the original image from a Laplacian Image Pyramid. * @@ -85,7 +85,7 @@ private: Pyramid _tmp_pyr; std::unique_ptr _addf; std::unique_ptr _scalef; - NEDepthConvert _depthf; + NEDepthConvertLayer _depthf; }; } #endif /*__ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H__ */ diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index cc12897278..9d478e051a 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -175,21 +175,21 @@ v17.09 Public major release - @ref arm_compute::NEGEMMAssemblyBaseKernel @ref arm_compute::NEGEMMAArch64Kernel - @ref arm_compute::NEDequantizationLayerKernel / @ref arm_compute::NEDequantizationLayer - @ref arm_compute::NEFloorKernel / @ref arm_compute::NEFloor - - @ref arm_compute::NEL2NormalizeKernel / @ref arm_compute::NEL2Normalize + - @ref arm_compute::NEL2NormalizeLayerKernel / @ref arm_compute::NEL2NormalizeLayer - @ref arm_compute::NEQuantizationLayerKernel @ref arm_compute::NEMinMaxLayerKernel / @ref arm_compute::NEQuantizationLayer - @ref arm_compute::NEROIPoolingLayerKernel / @ref arm_compute::NEROIPoolingLayer - @ref arm_compute::NEReductionOperationKernel / @ref arm_compute::NEReductionOperation - @ref arm_compute::NEReshapeLayerKernel / @ref arm_compute::NEReshapeLayer - New OpenCL kernels / functions: - - @ref arm_compute::CLDepthwiseConvolution3x3Kernel @ref arm_compute::CLDepthwiseIm2ColKernel @ref arm_compute::CLDepthwiseVectorToTensorKernel @ref arm_compute::CLDepthwiseWeightsReshapeKernel / @ref arm_compute::CLDepthwiseConvolution3x3 @ref arm_compute::CLDepthwiseConvolution @ref arm_compute::CLDepthwiseSeparableConvolutionLayer + - @ref arm_compute::CLDepthwiseConvolutionLayer3x3Kernel @ref arm_compute::CLDepthwiseIm2ColKernel @ref arm_compute::CLDepthwiseVectorToTensorKernel @ref arm_compute::CLDepthwiseWeightsReshapeKernel / @ref arm_compute::CLDepthwiseConvolutionLayer3x3 @ref arm_compute::CLDepthwiseConvolutionLayer @ref arm_compute::CLDepthwiseSeparableConvolutionLayer - @ref arm_compute::CLDequantizationLayerKernel / @ref arm_compute::CLDequantizationLayer - @ref arm_compute::CLDirectConvolutionLayerKernel / @ref arm_compute::CLDirectConvolutionLayer - @ref arm_compute::CLFlattenLayer - @ref arm_compute::CLFloorKernel / @ref arm_compute::CLFloor - @ref arm_compute::CLGEMMTranspose1xW - @ref arm_compute::CLGEMMMatrixVectorMultiplyKernel - - @ref arm_compute::CLL2NormalizeKernel / @ref arm_compute::CLL2Normalize + - @ref arm_compute::CLL2NormalizeLayerKernel / @ref arm_compute::CLL2NormalizeLayer - @ref arm_compute::CLQuantizationLayerKernel @ref arm_compute::CLMinMaxLayerKernel / @ref arm_compute::CLQuantizationLayer - @ref arm_compute::CLROIPoolingLayerKernel / @ref arm_compute::CLROIPoolingLayer - @ref arm_compute::CLReductionOperationKernel / @ref arm_compute::CLReductionOperation @@ -206,7 +206,7 @@ v17.06 Public major release - User can specify his own scheduler by implementing the @ref arm_compute::IScheduler interface. - New OpenCL kernels / functions: - @ref arm_compute::CLBatchNormalizationLayerKernel / @ref arm_compute::CLBatchNormalizationLayer - - @ref arm_compute::CLDepthConcatenateKernel / @ref arm_compute::CLDepthConcatenate + - @ref arm_compute::CLDepthConcatenateLayerKernel / @ref arm_compute::CLDepthConcatenateLayer - @ref arm_compute::CLHOGOrientationBinningKernel @ref arm_compute::CLHOGBlockNormalizationKernel, @ref arm_compute::CLHOGDetectorKernel / @ref arm_compute::CLHOGDescriptor @ref arm_compute::CLHOGDetector @ref arm_compute::CLHOGGradient @ref arm_compute::CLHOGMultiDetection - @ref arm_compute::CLLocallyConnectedMatrixMultiplyKernel / @ref arm_compute::CLLocallyConnectedLayer - @ref arm_compute::CLWeightsReshapeKernel / @ref arm_compute::CLConvolutionLayerReshapeWeights @@ -214,7 +214,7 @@ v17.06 Public major release - @ref arm_compute::CPPDetectionWindowNonMaximaSuppressionKernel - New NEON kernels / functions: - @ref arm_compute::NEBatchNormalizationLayerKernel / @ref arm_compute::NEBatchNormalizationLayer - - @ref arm_compute::NEDepthConcatenateKernel / @ref arm_compute::NEDepthConcatenate + - @ref arm_compute::NEDepthConcatenateLayerKernel / @ref arm_compute::NEDepthConcatenateLayer - @ref arm_compute::NEDirectConvolutionLayerKernel / @ref arm_compute::NEDirectConvolutionLayer - @ref arm_compute::NELocallyConnectedMatrixMultiplyKernel / @ref arm_compute::NELocallyConnectedLayer - @ref arm_compute::NEWeightsReshapeKernel / @ref arm_compute::NEConvolutionLayerReshapeWeights diff --git a/src/core/CL/kernels/CLDepthConcatenateKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateKernel.cpp deleted file mode 100644 index edfbf829ed..0000000000 --- a/src/core/CL/kernels/CLDepthConcatenateKernel.cpp +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include "support/ToolchainSupport.h" - -#include - -using namespace arm_compute; - -CLDepthConcatenateKernel::CLDepthConcatenateKernel() - : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0), _depth_offset(0) -{ -} - -BorderSize CLDepthConcatenateKernel::border_size() const -{ - return BorderSize(_top_bottom, _left_right); -} - -void CLDepthConcatenateKernel::configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output) -{ - static std::map> configs_map = - { - { 1, { "uchar", 16 } }, - { 2, { "ushort", 8 } }, - { 4, { "uint", 4 } }, - { 8, { "ulong", 2 } }, - }; - - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2)); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0)); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1)); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output); - ARM_COMPUTE_ERROR_ON(configs_map.find(input->info()->element_size()) == configs_map.end()); - - // The gaps between the two lowest dimensions of input and output need to be divisible by 2 - // Otherwise it is not clear how the padding should be added onto the input tensor - ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2); - ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2); - - _input = input; - _output = output; - _depth_offset = depth_offset; - - // Add build options - auto config = configs_map.find(static_cast(input->info()->element_size())); - std::set build_opts; - build_opts.emplace(("-DDATA_TYPE=" + config->second.first)); - build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(config->second.second))); - - // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("concatenate_depth", build_opts)); - - // Configure kernel window - _left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2; - _top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2; - - const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); - const unsigned int num_elems_read_per_iteration = 16 / input->info()->element_size(); - const unsigned int num_rows_read_per_iteration = 1; - - // The window needs to be based on input as we copy all the depths of input - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1)); - - AccessWindowRectangle input_access(input->info(), -_left_right, -_top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape())); - - ICLKernel::configure(win); -} - -void CLDepthConcatenateKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_3D(); - - const int offset_to_first_elements_in_bytes = _depth_offset * _output->info()->strides_in_bytes()[2]; - - unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters - const cl_int3 offsets = - { - { - static_cast(_left_right), - static_cast(_top_bottom), - static_cast(offset_to_first_elements_in_bytes), - } - }; - _kernel.setArg(idx, offsets); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice); - add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); - } - while(window.slide_window_slice_3D(slice)); -} diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp new file mode 100644 index 0000000000..0275d4fd83 --- /dev/null +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include "support/ToolchainSupport.h" + +#include + +using namespace arm_compute; + +CLDepthConcatenateLayerKernel::CLDepthConcatenateLayerKernel() + : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0), _depth_offset(0) +{ +} + +BorderSize CLDepthConcatenateLayerKernel::border_size() const +{ + return BorderSize(_top_bottom, _left_right); +} + +void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output) +{ + static std::map> configs_map = + { + { 1, { "uchar", 16 } }, + { 2, { "ushort", 8 } }, + { 4, { "uint", 4 } }, + { 8, { "ulong", 2 } }, + }; + + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2)); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0)); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1)); + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output); + ARM_COMPUTE_ERROR_ON(configs_map.find(input->info()->element_size()) == configs_map.end()); + + // The gaps between the two lowest dimensions of input and output need to be divisible by 2 + // Otherwise it is not clear how the padding should be added onto the input tensor + ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2); + ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2); + + _input = input; + _output = output; + _depth_offset = depth_offset; + + // Add build options + auto config = configs_map.find(static_cast(input->info()->element_size())); + std::set build_opts; + build_opts.emplace(("-DDATA_TYPE=" + config->second.first)); + build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(config->second.second))); + + // Create kernel + _kernel = static_cast(CLKernelLibrary::get().create_kernel("concatenate_depth", build_opts)); + + // Configure kernel window + _left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2; + _top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2; + + const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); + const unsigned int num_elems_read_per_iteration = 16 / input->info()->element_size(); + const unsigned int num_rows_read_per_iteration = 1; + + // The window needs to be based on input as we copy all the depths of input + Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); + win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1)); + + AccessWindowRectangle input_access(input->info(), -_left_right, -_top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape())); + + ICLKernel::configure(win); +} + +void CLDepthConcatenateLayerKernel::run(const Window &window, cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + Window slice = window.first_slice_window_3D(); + + const int offset_to_first_elements_in_bytes = _depth_offset * _output->info()->strides_in_bytes()[2]; + + unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters + const cl_int3 offsets = + { + { + static_cast(_left_right), + static_cast(_top_bottom), + static_cast(offset_to_first_elements_in_bytes), + } + }; + _kernel.setArg(idx, offsets); + + do + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, slice); + add_3D_tensor_argument(idx, _output, slice); + enqueue(queue, *this, slice); + } + while(window.slide_window_slice_3D(slice)); +} diff --git a/src/core/CL/kernels/CLDepthConvertKernel.cpp b/src/core/CL/kernels/CLDepthConvertKernel.cpp deleted file mode 100644 index b2132073d5..0000000000 --- a/src/core/CL/kernels/CLDepthConvertKernel.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2016, 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" - -#include -#include -#include - -using namespace arm_compute; - -void CLDepthConvertKernel::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::U8, DataType::S16, DataType::QS16, - DataType::U16, DataType::U32, DataType::S32, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::U8, DataType::S16, DataType::QS16, - DataType::U16, DataType::U32, DataType::S32, DataType::F32); - ARM_COMPUTE_ERROR_ON(input == output); - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == output->info()->data_type(), "Input and output data types must be different"); - ARM_COMPUTE_ERROR_ON(shift >= 8); - - // Check if convertion is supported - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && output->info()->data_type() != DataType::F32, - "Only data types supported [in] QS8 -> [out] F32"); - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && (output->info()->data_type() != DataType::F32), - "Only data types supported [in] QS16 -> [out] F32"); - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && ((output->info()->data_type() != DataType::QS8) && output->info()->data_type() != DataType::QS16), - "Only data types supported [in] F32 -> [out] QS8, QS16"); - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::U16 && output->info()->data_type() != DataType::S16 - && output->info()->data_type() != DataType::U32 && output->info()->data_type() != DataType::S32), - "Only data types supported [in] U8 -> [out] U16, S16, U32, S32"); - - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32 - && output->info()->data_type() != DataType::S32), - "Only data types supported [in] U16 -> [out] U8, U32, S32"); - - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32 - && output->info()->data_type() != DataType::S32), - "Only data types supported [in] S16 -> [out] U8, U32, S32"); - - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U32 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U16 - && output->info()->data_type() != DataType::S16), - "Only data types supported [in] U32 -> [out] U8, U16, S16"); - - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S32 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U16 - && output->info()->data_type() != DataType::S16), - "Only data types supported [in] S32 -> [out] U8, U16, S16"); - - // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given) - set_shape_if_empty(*output->info(), input->info()->tensor_shape()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - - // Get data sizes - const size_t input_size = data_size_from_type(input->info()->data_type()); - const size_t output_size = data_size_from_type(output->info()->data_type()); - - // Construct kernel name and build options - std::string kernel_name = "convert_depth"; - std::set build_opts; - if(input_size > output_size) - { - kernel_name += "_down"; - // Down conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined - build_opts.insert(((policy == ConvertPolicy::WRAP) && !is_data_type_float(input->info()->data_type())) ? "-DWRAP" : "-DSATURATE"); - } - else - { - kernel_name += "_up"; - } - build_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); - if(is_data_type_fixed_point(input->info()->data_type()) || is_data_type_fixed_point(output->info()->data_type())) - { - build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); - } - - // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); - - // Set shift arg - unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, shift); - - // Configure kernel - constexpr unsigned int num_elems_processed_per_iteration = 16; - ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration); -} diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp new file mode 100644 index 0000000000..83908a1469 --- /dev/null +++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" + +#include +#include +#include + +using namespace arm_compute; + +void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::U8, DataType::S16, DataType::QS16, + DataType::U16, DataType::U32, DataType::S32, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::U8, DataType::S16, DataType::QS16, + DataType::U16, DataType::U32, DataType::S32, DataType::F32); + ARM_COMPUTE_ERROR_ON(input == output); + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == output->info()->data_type(), "Input and output data types must be different"); + ARM_COMPUTE_ERROR_ON(shift >= 8); + + // Check if convertion is supported + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && output->info()->data_type() != DataType::F32, + "Only data types supported [in] QS8 -> [out] F32"); + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && (output->info()->data_type() != DataType::F32), + "Only data types supported [in] QS16 -> [out] F32"); + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && ((output->info()->data_type() != DataType::QS8) && output->info()->data_type() != DataType::QS16), + "Only data types supported [in] F32 -> [out] QS8, QS16"); + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::U16 && output->info()->data_type() != DataType::S16 + && output->info()->data_type() != DataType::U32 && output->info()->data_type() != DataType::S32), + "Only data types supported [in] U8 -> [out] U16, S16, U32, S32"); + + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32 + && output->info()->data_type() != DataType::S32), + "Only data types supported [in] U16 -> [out] U8, U32, S32"); + + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32 + && output->info()->data_type() != DataType::S32), + "Only data types supported [in] S16 -> [out] U8, U32, S32"); + + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U32 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U16 + && output->info()->data_type() != DataType::S16), + "Only data types supported [in] U32 -> [out] U8, U16, S16"); + + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S32 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U16 + && output->info()->data_type() != DataType::S16), + "Only data types supported [in] S32 -> [out] U8, U16, S16"); + + // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given) + set_shape_if_empty(*output->info(), input->info()->tensor_shape()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); + + // Get data sizes + const size_t input_size = data_size_from_type(input->info()->data_type()); + const size_t output_size = data_size_from_type(output->info()->data_type()); + + // Construct kernel name and build options + std::string kernel_name = "convert_depth"; + std::set build_opts; + if(input_size > output_size) + { + kernel_name += "_down"; + // Down conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined + build_opts.insert(((policy == ConvertPolicy::WRAP) && !is_data_type_float(input->info()->data_type())) ? "-DWRAP" : "-DSATURATE"); + } + else + { + kernel_name += "_up"; + } + build_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type())); + build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); + if(is_data_type_fixed_point(input->info()->data_type()) || is_data_type_fixed_point(output->info()->data_type())) + { + build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); + } + + // Create kernel + _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + + // Set shift arg + unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters + _kernel.setArg(idx++, shift); + + // Configure kernel + constexpr unsigned int num_elems_processed_per_iteration = 16; + ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration); +} diff --git a/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp deleted file mode 100644 index e86c55fbc0..0000000000 --- a/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h" - -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" - -using namespace arm_compute; - -namespace -{ -/** Calculates expected output shape dimension - * - * @param[in] Input shape - * - * @return Expected output shape - */ -TensorShape get_output_shape(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info) -{ - unsigned int output_width = 0; - unsigned int output_height = 0; - - std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), conv_info); - - TensorShape output_shape = input_shape; - output_shape.set(0, output_width); - output_shape.set(1, output_height); - - return output_shape; -} -} // namespace - -CLDepthwiseConvolution3x3Kernel::CLDepthwiseConvolution3x3Kernel() - : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_left(0), _conv_pad_top(0) -{ -} - -BorderSize CLDepthwiseConvolution3x3Kernel::border_size() const -{ - return _border_size; -} - -void CLDepthwiseConvolution3x3Kernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3); - - if(biases != nullptr) - { - if(is_data_type_quantized_asymmetric(weights->info()->data_type())) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32); - } - else - { - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases); - } - ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(2)); - ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1); - } - - // Get convolved dimensions - TensorShape output_shape = get_output_shape(input->info()->tensor_shape(), weights->info()->tensor_shape(), conv_info); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), - output_shape, - 1, - input->info()->data_type(), - input->info()->fixed_point_position(), - input->info()->quantization_info()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); - - _input = input; - _output = output; - _weights = weights; - _biases = biases; - _conv_stride_x = conv_info.stride().first; - _conv_stride_y = conv_info.stride().second; - _conv_pad_left = conv_info.pad_left(); - _conv_pad_top = conv_info.pad_top(); - _border_size = BorderSize(_conv_pad_top, conv_info.pad_right(), conv_info.pad_bottom(), _conv_pad_left); - - // Set build options - ARM_COMPUTE_ERROR_ON(_conv_stride_x < 1 || _conv_stride_x > 3); - CLBuildOptions build_opts; - build_opts.add_option("-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x)); - build_opts.add_option_if(_biases != nullptr, "-DHAS_BIAS"); - - // Create kernel - std::string kernel_name = is_data_type_quantized_asymmetric(_input->info()->data_type()) ? "depthwise_convolution_3x3_quantized" : "depthwise_convolution_3x3"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); - - // Set static arguments - if(is_data_type_quantized_asymmetric(_input->info()->data_type())) - { - float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale; - int output_multiplier = 0; - int output_shift = 0; - quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); - - unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0); - - _kernel.setArg(idx++, -_input->info()->quantization_info().offset); - _kernel.setArg(idx++, -_weights->info()->quantization_info().offset); - _kernel.setArg(idx++, _output->info()->quantization_info().offset); - _kernel.setArg(idx++, output_multiplier); - _kernel.setArg(idx++, output_shift); - } - - // Configure the local work size for Bifrost with a value obtained - // via exhaustive autotuning for the MobileNets tensor shapes. - const GPUTarget gpu_target = get_arch_from_target(get_target()); - if(gpu_target == GPUTarget::BIFROST) - { - const size_t width = input->info()->dimension(0); - if(width >= 56) // 56 or 112 - { - _lws_hint = cl::NDRange(8, 5, 2); - } - else if(width >= 14) // 14 or 28 - { - _lws_hint = cl::NDRange(1, 5, 2); - } - else // 7 - { - _lws_hint = cl::NDRange(1, 1, 2); - } - } - - // Configure kernel window - const unsigned int num_elems_processed_per_iteration = 2; - const unsigned int num_elems_written_per_iteration = 2; - const unsigned int num_elems_read_per_iteration = 3 + _conv_stride_x; - const unsigned int num_rows_read_per_iteration = 3; - - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration, _conv_stride_x, _conv_stride_y); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - AccessWindowStatic weights_access(weights->info(), 0, 0, weights->info()->dimension(0), weights->info()->dimension(1)); - - update_window_and_padding(win, input_access, weights_access, output_access); - - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - - ICLKernel::configure(win); -} - -void CLDepthwiseConvolution3x3Kernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - // Create input window and adjust - Window win_in = window; - win_in.adjust(Window::DimX, -_conv_pad_left, true); - win_in.adjust(Window::DimY, -_conv_pad_top, true); - win_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x); - win_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y); - - Window slice_in = win_in.first_slice_window_3D(); - Window slice_out = window.first_slice_window_3D(); - Window slice_weights = window.first_slice_window_3D(); - slice_weights.set_dimension_step(Window::DimX, 0); - slice_weights.set_dimension_step(Window::DimY, 0); - - // Set biases - if(_biases != nullptr) - { - unsigned int idx = 3 * num_arguments_per_3D_tensor(); - Window slice_biases; - slice_biases.use_tensor_dimensions(_biases->info()->tensor_shape()); - add_1D_tensor_argument(idx, _biases, slice_biases); - } - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice_in); - add_3D_tensor_argument(idx, _output, slice_out); - add_3D_tensor_argument(idx, _weights, slice_weights); - - enqueue(queue, *this, slice_out, _lws_hint); - } - while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in)); -} diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp new file mode 100644 index 0000000000..003f1f8330 --- /dev/null +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" + +using namespace arm_compute; + +namespace +{ +/** Calculates expected output shape dimension + * + * @param[in] Input shape + * + * @return Expected output shape + */ +TensorShape get_output_shape(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info) +{ + unsigned int output_width = 0; + unsigned int output_height = 0; + + std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), conv_info); + + TensorShape output_shape = input_shape; + output_shape.set(0, output_width); + output_shape.set(1, output_height); + + return output_shape; +} +} // namespace + +CLDepthwiseConvolutionLayer3x3Kernel::CLDepthwiseConvolutionLayer3x3Kernel() + : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_left(0), _conv_pad_top(0) +{ +} + +BorderSize CLDepthwiseConvolutionLayer3x3Kernel::border_size() const +{ + return _border_size; +} + +void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3); + + if(biases != nullptr) + { + if(is_data_type_quantized_asymmetric(weights->info()->data_type())) + { + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32); + } + else + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases); + } + ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(2)); + ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1); + } + + // Get convolved dimensions + TensorShape output_shape = get_output_shape(input->info()->tensor_shape(), weights->info()->tensor_shape(), conv_info); + + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), + output_shape, + 1, + input->info()->data_type(), + input->info()->fixed_point_position(), + input->info()->quantization_info()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); + + _input = input; + _output = output; + _weights = weights; + _biases = biases; + _conv_stride_x = conv_info.stride().first; + _conv_stride_y = conv_info.stride().second; + _conv_pad_left = conv_info.pad_left(); + _conv_pad_top = conv_info.pad_top(); + _border_size = BorderSize(_conv_pad_top, conv_info.pad_right(), conv_info.pad_bottom(), _conv_pad_left); + + // Set build options + ARM_COMPUTE_ERROR_ON(_conv_stride_x < 1 || _conv_stride_x > 3); + CLBuildOptions build_opts; + build_opts.add_option("-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x)); + build_opts.add_option_if(_biases != nullptr, "-DHAS_BIAS"); + + // Create kernel + std::string kernel_name = is_data_type_quantized_asymmetric(_input->info()->data_type()) ? "depthwise_convolution_3x3_quantized" : "depthwise_convolution_3x3"; + _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); + + // Set static arguments + if(is_data_type_quantized_asymmetric(_input->info()->data_type())) + { + float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale; + int output_multiplier = 0; + int output_shift = 0; + quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); + + unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0); + + _kernel.setArg(idx++, -_input->info()->quantization_info().offset); + _kernel.setArg(idx++, -_weights->info()->quantization_info().offset); + _kernel.setArg(idx++, _output->info()->quantization_info().offset); + _kernel.setArg(idx++, output_multiplier); + _kernel.setArg(idx++, output_shift); + } + + // Configure the local work size for Bifrost with a value obtained + // via exhaustive autotuning for the MobileNets tensor shapes. + const GPUTarget gpu_target = get_arch_from_target(get_target()); + if(gpu_target == GPUTarget::BIFROST) + { + const size_t width = input->info()->dimension(0); + if(width >= 56) // 56 or 112 + { + _lws_hint = cl::NDRange(8, 5, 2); + } + else if(width >= 14) // 14 or 28 + { + _lws_hint = cl::NDRange(1, 5, 2); + } + else // 7 + { + _lws_hint = cl::NDRange(1, 1, 2); + } + } + + // Configure kernel window + const unsigned int num_elems_processed_per_iteration = 2; + const unsigned int num_elems_written_per_iteration = 2; + const unsigned int num_elems_read_per_iteration = 3 + _conv_stride_x; + const unsigned int num_rows_read_per_iteration = 3; + + Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); + + AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration, _conv_stride_x, _conv_stride_y); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); + AccessWindowStatic weights_access(weights->info(), 0, 0, weights->info()->dimension(0), weights->info()->dimension(1)); + + update_window_and_padding(win, input_access, weights_access, output_access); + + output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); + + ICLKernel::configure(win); +} + +void CLDepthwiseConvolutionLayer3x3Kernel::run(const Window &window, cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); + + // Create input window and adjust + Window win_in = window; + win_in.adjust(Window::DimX, -_conv_pad_left, true); + win_in.adjust(Window::DimY, -_conv_pad_top, true); + win_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x); + win_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y); + + Window slice_in = win_in.first_slice_window_3D(); + Window slice_out = window.first_slice_window_3D(); + Window slice_weights = window.first_slice_window_3D(); + slice_weights.set_dimension_step(Window::DimX, 0); + slice_weights.set_dimension_step(Window::DimY, 0); + + // Set biases + if(_biases != nullptr) + { + unsigned int idx = 3 * num_arguments_per_3D_tensor(); + Window slice_biases; + slice_biases.use_tensor_dimensions(_biases->info()->tensor_shape()); + add_1D_tensor_argument(idx, _biases, slice_biases); + } + + do + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, slice_in); + add_3D_tensor_argument(idx, _output, slice_out); + add_3D_tensor_argument(idx, _weights, slice_weights); + + enqueue(queue, *this, slice_out, _lws_hint); + } + while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in)); +} diff --git a/src/core/CL/kernels/CLL2NormalizeKernel.cpp b/src/core/CL/kernels/CLL2NormalizeKernel.cpp deleted file mode 100644 index 3e0758c980..0000000000 --- a/src/core/CL/kernels/CLL2NormalizeKernel.cpp +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/CL/kernels/CLL2NormalizeKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/FixedPoint.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -CLL2NormalizeKernel::CLL2NormalizeKernel() - : _input(nullptr), _sum(nullptr), _output(nullptr), _axis(0), _epsilon(1e-12) -{ -} - -void CLL2NormalizeKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); - - // Sum and output tensor auto initialization if not yet initialized - auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); - ARM_COMPUTE_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0"); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - - _input = input; - _sum = sum; - _output = output; - _axis = axis; - _epsilon = epsilon; - - const unsigned int num_elems_processed_per_iteration = 16; - - // Set build options - std::set build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - - // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("l2_normalize", build_opts)); - - // Set epsilon argument - unsigned int idx = num_arguments_per_1D_tensor() * 3; - _kernel.setArg(idx, _epsilon); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, input->info()->valid_region()); - - ICLKernel::configure(win); -} - -void CLL2NormalizeKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - Window window_sum(window); - window_sum.set(Window::DimX, Window::Dimension(0, 0, 0)); - - Window in_slice = window.first_slice_window_1D(); - Window sum_slice = window_sum.first_slice_window_1D(); - - do - { - unsigned int idx = 0; - add_1D_tensor_argument(idx, _input, in_slice); - add_1D_tensor_argument(idx, _sum, sum_slice); - add_1D_tensor_argument(idx, _output, in_slice); - enqueue(queue, *this, in_slice); - } - while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice)); -} diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp new file mode 100644 index 0000000000..36e351e048 --- /dev/null +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/FixedPoint.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +CLL2NormalizeLayerKernel::CLL2NormalizeLayerKernel() + : _input(nullptr), _sum(nullptr), _output(nullptr), _axis(0), _epsilon(1e-12) +{ +} + +void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_NULLPTR(output); + + // Sum and output tensor auto initialization if not yet initialized + auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); + ARM_COMPUTE_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0"); + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); + + _input = input; + _sum = sum; + _output = output; + _axis = axis; + _epsilon = epsilon; + + const unsigned int num_elems_processed_per_iteration = 16; + + // Set build options + std::set build_opts; + build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); + + // Create kernel + _kernel = static_cast(CLKernelLibrary::get().create_kernel("l2_normalize", build_opts)); + + // Set epsilon argument + unsigned int idx = num_arguments_per_1D_tensor() * 3; + _kernel.setArg(idx, _epsilon); + + // Configure kernel window + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + + AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, input->info()->valid_region()); + + ICLKernel::configure(win); +} + +void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); + + Window window_sum(window); + window_sum.set(Window::DimX, Window::Dimension(0, 0, 0)); + + Window in_slice = window.first_slice_window_1D(); + Window sum_slice = window_sum.first_slice_window_1D(); + + do + { + unsigned int idx = 0; + add_1D_tensor_argument(idx, _input, in_slice); + add_1D_tensor_argument(idx, _sum, sum_slice); + add_1D_tensor_argument(idx, _output, in_slice); + enqueue(queue, *this, in_slice); + } + while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp deleted file mode 100644 index b90a8e7b89..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -GCDepthConcatenateKernel::GCDepthConcatenateKernel() - : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0) -{ -} - -BorderSize GCDepthConcatenateKernel::border_size() const -{ - return BorderSize(_top_bottom, _left_right); -} - -void GCDepthConcatenateKernel::configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2)); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0)); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1)); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output); - - // The gaps between the two lowest dimensions of input and output need to be divisible by 2 - // Otherwise it is not clear how the padding should be added onto the input tensor - ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2); - ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2); - - _input = input; - _output = output; - - // Add build options - std::set build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - build_opts.emplace(("#define " + dt_name)); - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - - // Configure kernel window - _left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2; - _top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2; - - const int offset_to_first_elements_in_bytes = depth_offset * output->info()->strides_in_bytes()[2]; - - build_opts.emplace("#define OFFSETS_X " + support::cpp11::to_string(_left_right)); - build_opts.emplace("#define OFFSETS_Y " + support::cpp11::to_string(_top_bottom)); - build_opts.emplace("#define OFFSETS_Z " + support::cpp11::to_string(offset_to_first_elements_in_bytes)); - - // Create kernel - _kernel = static_cast(GCKernelLibrary::get().create_kernel("concatenate_depth", build_opts)); - - unsigned int num_elems_processed_per_iteration = 1; - unsigned int num_elems_read_per_iteration = 1; - if(input->info()->data_type() == DataType::F32) - { - num_elems_processed_per_iteration = 1; - num_elems_read_per_iteration = 1; - } - else if(input->info()->data_type() == DataType::F16) - { - num_elems_processed_per_iteration = 4; - num_elems_read_per_iteration = 4; - } - const unsigned int num_rows_read_per_iteration = 1; - - // The window needs to be based on input as we copy all the depths of input - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1)); - - AccessWindowRectangle input_access(input->info(), -_left_right, -_top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape())); - - IGCKernel::configure(win); -} - -void GCDepthConcatenateKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); - - _kernel.use(); - - Window slice = window.first_slice_window_3D(); - - do - { - if(_input->info()->data_type() == DataType::F32) - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, 1, slice); - add_3D_tensor_argument(idx, _output, 2, slice); - } - else if(_input->info()->data_type() == DataType::F16) - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice); - add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice); - } - - _kernel.update_shader_params(); - - enqueue(*this, slice); - } - while(window.slide_window_slice_3D(slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp new file mode 100644 index 0000000000..a6111782fd --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +GCDepthConcatenateLayerKernel::GCDepthConcatenateLayerKernel() + : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0) +{ +} + +BorderSize GCDepthConcatenateLayerKernel::border_size() const +{ + return BorderSize(_top_bottom, _left_right); +} + +void GCDepthConcatenateLayerKernel::configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2)); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0)); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1)); + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output); + + // The gaps between the two lowest dimensions of input and output need to be divisible by 2 + // Otherwise it is not clear how the padding should be added onto the input tensor + ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2); + ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2); + + _input = input; + _output = output; + + // Add build options + std::set build_opts; + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + build_opts.emplace(("#define " + dt_name)); + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + + // Configure kernel window + _left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2; + _top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2; + + const int offset_to_first_elements_in_bytes = depth_offset * output->info()->strides_in_bytes()[2]; + + build_opts.emplace("#define OFFSETS_X " + support::cpp11::to_string(_left_right)); + build_opts.emplace("#define OFFSETS_Y " + support::cpp11::to_string(_top_bottom)); + build_opts.emplace("#define OFFSETS_Z " + support::cpp11::to_string(offset_to_first_elements_in_bytes)); + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("concatenate_depth", build_opts)); + + unsigned int num_elems_processed_per_iteration = 1; + unsigned int num_elems_read_per_iteration = 1; + if(input->info()->data_type() == DataType::F32) + { + num_elems_processed_per_iteration = 1; + num_elems_read_per_iteration = 1; + } + else if(input->info()->data_type() == DataType::F16) + { + num_elems_processed_per_iteration = 4; + num_elems_read_per_iteration = 4; + } + const unsigned int num_rows_read_per_iteration = 1; + + // The window needs to be based on input as we copy all the depths of input + Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); + win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1)); + + AccessWindowRectangle input_access(input->info(), -_left_right, -_top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape())); + + IGCKernel::configure(win); +} + +void GCDepthConcatenateLayerKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); + + _kernel.use(); + + Window slice = window.first_slice_window_3D(); + + do + { + if(_input->info()->data_type() == DataType::F32) + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, 1, slice); + add_3D_tensor_argument(idx, _output, 2, slice); + } + else if(_input->info()->data_type() == DataType::F16) + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice); + add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice); + } + + _kernel.update_shader_params(); + + enqueue(*this, slice); + } + while(window.slide_window_slice_3D(slice)); +} diff --git a/src/core/NEON/kernels/NEDepthConcatenateKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateKernel.cpp deleted file mode 100644 index 7a62b0cb03..0000000000 --- a/src/core/NEON/kernels/NEDepthConcatenateKernel.cpp +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/NEFixedPoint.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include -#include - -using namespace arm_compute; - -namespace -{ -// Overloads of 128-bit vector loads -uint8x16_t loadq(const uint8_t *ptr) -{ - return vld1q_u8(ptr); -} -uint16x8_t loadq(const uint16_t *ptr) -{ - return vld1q_u16(ptr); -} -uint32x4_t loadq(const uint32_t *ptr) -{ - return vld1q_u32(ptr); -} -// Overloads of 128-bit vector stores -void storeq(uint8_t *ptr, uint8x16_t val) -{ - return vst1q_u8(ptr, val); -} -void storeq(uint16_t *ptr, uint16x8_t val) -{ - return vst1q_u16(ptr, val); -} -void storeq(uint32_t *ptr, uint32x4_t val) -{ - return vst1q_u32(ptr, val); -} - -template -void depth_concat(const ITensor *in, ITensor *out, std::pair start_xy, int depth_offset, const Window &window) -{ - const int start_x = start_xy.first; - const int start_y = start_xy.second; - - // Offset input - const int input_offset_to_first_elements_in_bytes = in->info()->offset_first_element_in_bytes() - start_x * in->info()->strides_in_bytes()[0] - start_y * in->info()->strides_in_bytes()[1]; - uint8_t *input_ptr = in->buffer() + input_offset_to_first_elements_in_bytes; - - // Offset output - const unsigned int output_offset_to_first_elements_in_bytes = out->info()->offset_first_element_in_bytes() + depth_offset * out->info()->strides_in_bytes()[2]; - uint8_t *output_ptr = out->buffer() + output_offset_to_first_elements_in_bytes; - - Iterator input(in, window); - Iterator output(out, window); - - execute_window_loop(window, [&](const Coordinates & id) - { - const auto in_ptr = reinterpret_cast(input_ptr + input.offset()); - const auto out_ptr = reinterpret_cast(output_ptr + output.offset()); - - storeq(out_ptr, loadq(in_ptr)); - }, - input, output); -} -} // namespace - -NEDepthConcatenateKernel::NEDepthConcatenateKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0), _depth_offset(0) -{ -} - -BorderSize NEDepthConcatenateKernel::border_size() const -{ - return BorderSize(_top_bottom, _left_right); -} - -void NEDepthConcatenateKernel::configure(const ITensor *input, unsigned int depth_offset, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2)); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0)); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1)); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output); - - // The gaps between the two lowest dimensions of input and output need to be divisible by 2 - // Otherwise it is not clear how the padding should be added onto the input tensor - ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2); - ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2); - - _func = nullptr; - _input = input; - _output = output; - _depth_offset = depth_offset; - _left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2; - _top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2; - - switch(input->info()->data_type()) - { - case DataType::QS8: - _func = &depth_concat; - break; - case DataType::QS16: - case DataType::F16: - _func = &depth_concat; - break; - case DataType::F32: - _func = &depth_concat; - break; - default: - ARM_COMPUTE_ERROR("Unsupported data type."); - } - - const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); - const unsigned int num_elems_read_per_iteration = 16 / input->info()->element_size(); - const unsigned int num_rows_read_per_iteration = 1; - - // The window needs to be based on input as we copy all the depths of input - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1)); - - AccessWindowRectangle input_access(input->info(), -_left_right, -_top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape())); - - INEKernel::configure(win); -} - -void NEDepthConcatenateKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (*_func)(_input, _output, std::make_pair(_left_right, _top_bottom), _depth_offset, window); -} diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp new file mode 100644 index 0000000000..01b0f10f70 --- /dev/null +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/NEFixedPoint.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include +#include + +using namespace arm_compute; + +namespace +{ +// Overloads of 128-bit vector loads +uint8x16_t loadq(const uint8_t *ptr) +{ + return vld1q_u8(ptr); +} +uint16x8_t loadq(const uint16_t *ptr) +{ + return vld1q_u16(ptr); +} +uint32x4_t loadq(const uint32_t *ptr) +{ + return vld1q_u32(ptr); +} +// Overloads of 128-bit vector stores +void storeq(uint8_t *ptr, uint8x16_t val) +{ + return vst1q_u8(ptr, val); +} +void storeq(uint16_t *ptr, uint16x8_t val) +{ + return vst1q_u16(ptr, val); +} +void storeq(uint32_t *ptr, uint32x4_t val) +{ + return vst1q_u32(ptr, val); +} + +template +void depth_concat(const ITensor *in, ITensor *out, std::pair start_xy, int depth_offset, const Window &window) +{ + const int start_x = start_xy.first; + const int start_y = start_xy.second; + + // Offset input + const int input_offset_to_first_elements_in_bytes = in->info()->offset_first_element_in_bytes() - start_x * in->info()->strides_in_bytes()[0] - start_y * in->info()->strides_in_bytes()[1]; + uint8_t *input_ptr = in->buffer() + input_offset_to_first_elements_in_bytes; + + // Offset output + const unsigned int output_offset_to_first_elements_in_bytes = out->info()->offset_first_element_in_bytes() + depth_offset * out->info()->strides_in_bytes()[2]; + uint8_t *output_ptr = out->buffer() + output_offset_to_first_elements_in_bytes; + + Iterator input(in, window); + Iterator output(out, window); + + execute_window_loop(window, [&](const Coordinates & id) + { + const auto in_ptr = reinterpret_cast(input_ptr + input.offset()); + const auto out_ptr = reinterpret_cast(output_ptr + output.offset()); + + storeq(out_ptr, loadq(in_ptr)); + }, + input, output); +} +} // namespace + +NEDepthConcatenateLayerKernel::NEDepthConcatenateLayerKernel() + : _func(nullptr), _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0), _depth_offset(0) +{ +} + +BorderSize NEDepthConcatenateLayerKernel::border_size() const +{ + return BorderSize(_top_bottom, _left_right); +} + +void NEDepthConcatenateLayerKernel::configure(const ITensor *input, unsigned int depth_offset, ITensor *output) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2)); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0)); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1)); + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output); + + // The gaps between the two lowest dimensions of input and output need to be divisible by 2 + // Otherwise it is not clear how the padding should be added onto the input tensor + ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2); + ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2); + + _func = nullptr; + _input = input; + _output = output; + _depth_offset = depth_offset; + _left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2; + _top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2; + + switch(input->info()->data_type()) + { + case DataType::QS8: + _func = &depth_concat; + break; + case DataType::QS16: + case DataType::F16: + _func = &depth_concat; + break; + case DataType::F32: + _func = &depth_concat; + break; + default: + ARM_COMPUTE_ERROR("Unsupported data type."); + } + + const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); + const unsigned int num_elems_read_per_iteration = 16 / input->info()->element_size(); + const unsigned int num_rows_read_per_iteration = 1; + + // The window needs to be based on input as we copy all the depths of input + Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); + win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1)); + + AccessWindowRectangle input_access(input->info(), -_left_right, -_top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape())); + + INEKernel::configure(win); +} + +void NEDepthConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + ARM_COMPUTE_ERROR_ON(_func == nullptr); + + (*_func)(_input, _output, std::make_pair(_left_right, _top_bottom), _depth_offset, window); +} diff --git a/src/core/NEON/kernels/NEDepthConvertKernel.cpp b/src/core/NEON/kernels/NEDepthConvertKernel.cpp deleted file mode 100644 index d97a20be65..0000000000 --- a/src/core/NEON/kernels/NEDepthConvertKernel.cpp +++ /dev/null @@ -1,524 +0,0 @@ -/* - * Copyright (c) 2016, 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/NEFixedPoint.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" - -#include - -using namespace arm_compute; - -namespace arm_compute -{ -class Coordinates; -} // namespace arm_compute - -NEDepthConvertKernel::NEDepthConvertKernel() - : _input(nullptr), _output(nullptr), _policy(), _shift(0), _fixed_point_position_input(0), _fixed_point_position_output(0) -{ -} - -void NEDepthConvertKernel::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::F32); - - _input = input; - _output = input; - _policy = policy; - _shift = shift; - - if(output != nullptr) - { - // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given) - set_shape_if_empty(*output->info(), input->info()->tensor_shape()); - - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::U32, DataType::S32, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - - // Set output - _output = output; - } - - // Set initial fixed point position of input and output - _fixed_point_position_input = input->info()->fixed_point_position(); - _fixed_point_position_output = _output->info()->fixed_point_position(); - - // Set the fixed point position to the output tensor if needed - if(is_data_type_fixed_point(input->info()->data_type()) && is_data_type_fixed_point(_output->info()->data_type())) - { - // If in-place set the fixed point position of the output tensor to be equal to shift - _fixed_point_position_output = (_input == _output) ? static_cast(_shift) : _fixed_point_position_output; - // Set fixed point position to output tensor - _output->info()->set_fixed_point_position(_fixed_point_position_output); - } - - ARM_COMPUTE_ERROR_ON(shift >= 8 && (!is_data_type_fixed_point(input->info()->data_type()) && !is_data_type_fixed_point(output->info()->data_type()))); - ARM_COMPUTE_ERROR_ON(input == output && (data_size_from_type(input->info()->data_type()) != data_size_from_type(output->info()->data_type()))); - - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::S16 && output->info()->data_type() != DataType::U16 - && output->info()->data_type() != DataType::S32), - "Only data_types supported [in] U8 -> [out] U16, S16, S32"); - - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::F32), - "Only data_types supported [in] QS8 -> [out] QS8, F32"); - - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32), - "Only data_types supported [in] U16 -> [out] U8, U32"); - - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::S32), - "Only data_types supported [in] S16 -> [out] U8, S32"); - - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && (output->info()->data_type() != DataType::QS16 && output->info()->data_type() != DataType::F32), - "Only data_types supported [in] QS16 -> [out] QS16, F32"); - - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::QS16), - "Only data_types supported [in] F32 -> [out] QS8, QS16"); - - constexpr unsigned int num_elems_processed_per_iteration = 16; - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - if(output != nullptr) - { - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, input->info()->valid_region()); - } - else - { - // In-place computation - update_window_and_padding(win, input_access); - } - ICPPKernel::configure(win); -} - -void NEDepthConvertKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - ARM_COMPUTE_ERROR_ON(nullptr == _input); - ARM_COMPUTE_ERROR_ON(nullptr == _output); - ARM_COMPUTE_ERROR_ON(_input == _output); - - Iterator input(_input, window); - Iterator output(_output, window); - - bool in_place = (_input == _output); - - switch(_input->info()->data_type()) - { - case DataType::U8: - { - const int16x8_t b = vdupq_n_s16(_shift); - - switch(_output->info()->data_type()) - { - case DataType::S16: - { - /* Up-conversion U8 -> S16 */ - execute_window_loop(window, [&](const Coordinates & id) - { - const uint8x16_t texels_u8 = vld1q_u8(input.ptr()); - - const int16x8x2_t texels = - { - { - vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b), - vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b) - } - }; - - vst1q_s16(reinterpret_cast(output.ptr()), texels.val[0]); - vst1q_s16(reinterpret_cast(output.ptr()) + 8, texels.val[1]); - }, - input, output); - break; - } - case DataType::S32: - { - /* Up-conversion U8 -> S32 */ - execute_window_loop(window, [&](const Coordinates & id) - { - const uint8x16_t texels_u8 = vld1q_u8(input.ptr()); - - const int16x8x2_t texels = - { - { - vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b), - vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b) - } - }; - - vst1q_s32(reinterpret_cast(output.ptr()), vmovl_s16(vget_low_s16(texels.val[0]))); - vst1q_s32(reinterpret_cast(output.ptr()) + 4, vmovl_s16(vget_high_s16(texels.val[0]))); - vst1q_s32(reinterpret_cast(output.ptr()) + 8, vmovl_s16(vget_low_s16(texels.val[1]))); - vst1q_s32(reinterpret_cast(output.ptr()) + 12, vmovl_s16(vget_high_s16(texels.val[1]))); - }, - input, output); - break; - } - case DataType::U16: - { - /* Up-conversion U8 -> U16 */ - execute_window_loop(window, [&](const Coordinates & id) - { - const uint8x16_t texels_u8 = vld1q_u8(input.ptr()); - - const uint16x8x2_t texels = - { - { - vshlq_u16(vmovl_u8(vget_low_u8(texels_u8)), b), - vshlq_u16(vmovl_u8(vget_high_u8(texels_u8)), b) - } - }; - - vst1q_u16(reinterpret_cast(output.ptr()), texels.val[0]); - vst1q_u16(reinterpret_cast(output.ptr()) + 8, texels.val[1]); - }, - input, output); - break; - } - default: - ARM_COMPUTE_ERROR("Output data type not supported"); - } - break; - } - case DataType::QS8: - { - switch(_output->info()->data_type()) - { - case DataType::QS8: - { - const int relative_shift = _fixed_point_position_output - _fixed_point_position_input; - /* Fixed point position conversion QS8 -> QS8 */ - if(relative_shift != 0 || !in_place) - { - const auto relative_shift_vec = vdupq_n_qs8(relative_shift); - execute_window_loop(window, [&](const Coordinates & id) - { - const qint8x16_t texels_qs8 = vld1q_qs8(reinterpret_cast(input.ptr())); - vst1q_qs8(reinterpret_cast(output.ptr()), vqrshlq_s8(texels_qs8, relative_shift_vec)); - }, - input, output); - } - break; - } - case DataType::F32: - { - /* Up-conversion QS8 -> F32 */ - execute_window_loop(window, [&](const Coordinates & id) - { - const qint8x16_t texels_qs8 = vld1q_qs8(reinterpret_cast(input.ptr())); - - float32x4x2_t texels_low = vcvt_f32_qs8(vget_low_s8(texels_qs8), _fixed_point_position_input); - float32x4x2_t texels_high = vcvt_f32_qs8(vget_high_s8(texels_qs8), _fixed_point_position_input); - - vst1q_f32(reinterpret_cast(output.ptr()), texels_low.val[0]); - vst1q_f32(reinterpret_cast(output.ptr()) + 4, texels_low.val[1]); - vst1q_f32(reinterpret_cast(output.ptr()) + 8, texels_high.val[0]); - vst1q_f32(reinterpret_cast(output.ptr()) + 12, texels_high.val[1]); - }, - input, output); - break; - } - default: - ARM_COMPUTE_ERROR("Output data type not supported"); - } - break; - } - case DataType::S16: - { - switch(_output->info()->data_type()) - { - case DataType::U8: - { - const int16x8_t b = vdupq_n_s16(-static_cast(_shift)); - - /* Down-conversion S16 -> U8 */ - if(ConvertPolicy::SATURATE == _policy) - { - execute_window_loop(window, [&](const Coordinates & id) - { - const int16x8x2_t texels = - { - { - vqshlq_s16(vld1q_s16(reinterpret_cast(input.ptr())), b), - vqshlq_s16(vld1q_s16(reinterpret_cast(input.ptr()) + 8), b) - } - }; - - vst1q_u8(output.ptr(), vcombine_u8(vqmovun_s16(texels.val[0]), vqmovun_s16(texels.val[1]))); - }, - input, output); - } - else - { - execute_window_loop(window, [&](const Coordinates & id) - { - const int16x8x2_t texels = - { - { - vshlq_s16(vld1q_s16(reinterpret_cast(input.ptr())), b), - vshlq_s16(vld1q_s16(reinterpret_cast(input.ptr()) + 8), b) - } - }; - - vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(vreinterpretq_u16_s16(texels.val[0])), - vmovn_u16(vreinterpretq_u16_s16(texels.val[1])))); - }, - input, output); - } - break; - } - case DataType::S32: - { - const int32x4_t b = vdupq_n_s32(_shift); - - /* Up-conversion S16 -> S32 */ - execute_window_loop(window, [&](const Coordinates & id) - { - const int16x8x2_t texels = - { - { - vld1q_s16(reinterpret_cast(input.ptr())), - vld1q_s16(reinterpret_cast(input.ptr()) + 8) - } - }; - - const int32x4x4_t texels_s32 = - { - { - vshlq_s32(vmovl_s16(vget_low_s16(texels.val[0])), b), - vshlq_s32(vmovl_s16(vget_high_s16(texels.val[0])), b), - vshlq_s32(vmovl_s16(vget_low_s16(texels.val[1])), b), - vshlq_s32(vmovl_s16(vget_high_s16(texels.val[1])), b) - } - }; - - vst1q_s32(reinterpret_cast(output.ptr()), texels_s32.val[0]); - vst1q_s32(reinterpret_cast(output.ptr()) + 4, texels_s32.val[1]); - vst1q_s32(reinterpret_cast(output.ptr()) + 8, texels_s32.val[2]); - vst1q_s32(reinterpret_cast(output.ptr()) + 12, texels_s32.val[3]); - }, - input, output); - break; - } - default: - ARM_COMPUTE_ERROR("Output data type not supported"); - } - break; - } - case DataType::U16: - { - switch(_output->info()->data_type()) - { - case DataType::U8: - { - const int16x8_t b = vdupq_n_s16(-static_cast(_shift)); - - /* Down-conversion U16 -> U8 */ - if(ConvertPolicy::SATURATE == _policy) - { - execute_window_loop(window, [&](const Coordinates & id) - { - const uint16x8x2_t texels = - { - { - vqshlq_u16(vld1q_u16(reinterpret_cast(input.ptr())), b), - vqshlq_u16(vld1q_u16(reinterpret_cast(input.ptr()) + 8), b) - } - }; - - vst1q_u8(output.ptr(), vcombine_u8(vqmovn_u16(texels.val[0]), vqmovn_u16(texels.val[1]))); - }, - input, output); - } - else - { - execute_window_loop(window, [&](const Coordinates & id) - { - const uint16x8x2_t texels = - { - { - vshlq_u16(vld1q_u16(reinterpret_cast(input.ptr())), b), - vshlq_u16(vld1q_u16(reinterpret_cast(input.ptr()) + 8), b) - } - }; - - vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(texels.val[0]), vmovn_u16(texels.val[1]))); - }, - input, output); - } - break; - } - case DataType::U32: - { - const int32x4_t b = vdupq_n_s32(_shift); - - /* Up-conversion U16 -> U32 */ - execute_window_loop(window, [&](const Coordinates & id) - { - const uint16x8x2_t texels = - { - { - vld1q_u16(reinterpret_cast(input.ptr())), - vld1q_u16(reinterpret_cast(input.ptr()) + 8) - } - }; - - vst1q_u32(reinterpret_cast(output.ptr()), vshlq_u32(vmovl_u16(vget_low_u16(texels.val[0])), b)); - vst1q_u32(reinterpret_cast(output.ptr()) + 4, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[0])), b)); - vst1q_u32(reinterpret_cast(output.ptr()) + 8, vshlq_u32(vmovl_u16(vget_low_u16(texels.val[1])), b)); - vst1q_u32(reinterpret_cast(output.ptr()) + 12, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[1])), b)); - }, - input, output); - break; - } - default: - ARM_COMPUTE_ERROR("Output data type not supported"); - } - break; - } - case DataType::QS16: - { - switch(_output->info()->data_type()) - { - case DataType::QS16: - { - const int relative_shift = _fixed_point_position_output - _fixed_point_position_input; - /* Fixed point position conversion QS16 -> QS16 */ - if(relative_shift != 0 || !in_place) - { - const auto relative_shift_vec = vdupq_n_qs16(relative_shift); - execute_window_loop(window, [&](const Coordinates & id) - { - const qint16x8x2_t texels_qs16 = - { - { - vld1q_qs16(reinterpret_cast(input.ptr())), - vld1q_qs16(reinterpret_cast(input.ptr()) + 8) - } - }; - vst1q_qs16(reinterpret_cast(output.ptr()), vqrshlq_s16(texels_qs16.val[0], relative_shift_vec)); - vst1q_qs16(reinterpret_cast(output.ptr()) + 8, vqrshlq_s16(texels_qs16.val[1], relative_shift_vec)); - }, - input, output); - } - break; - } - case DataType::F32: - { - /* Up-conversion QS16 -> F32 */ - execute_window_loop(window, [&](const Coordinates & id) - { - const int16x8x2_t texels_qs16 = - { - { - vld1q_s16(reinterpret_cast(input.ptr())), - vld1q_s16(reinterpret_cast(input.ptr()) + 8) - } - }; - - vst1q_f32(reinterpret_cast(output.ptr()), vcvt_f32_qs16(vget_low_s16(texels_qs16.val[0]), _fixed_point_position_input)); - vst1q_f32(reinterpret_cast(output.ptr()) + 4, vcvt_f32_qs16(vget_high_s16(texels_qs16.val[0]), _fixed_point_position_input)); - vst1q_f32(reinterpret_cast(output.ptr()) + 8, vcvt_f32_qs16(vget_low_s16(texels_qs16.val[1]), _fixed_point_position_input)); - vst1q_f32(reinterpret_cast(output.ptr()) + 12, vcvt_f32_qs16(vget_high_s16(texels_qs16.val[1]), _fixed_point_position_input)); - }, - input, output); - break; - } - default: - ARM_COMPUTE_ERROR("Output data type not supported"); - } - break; - } - case DataType::F32: - { - switch(_output->info()->data_type()) - { - case DataType::QS8: - { - /* Down-conversion F32 -> QS8 */ - execute_window_loop(window, [&](const Coordinates & id) - { - const float32x4x4_t texels_f32 = - { - { - vld1q_f32(reinterpret_cast(input.ptr())), - vld1q_f32(reinterpret_cast(input.ptr()) + 4), - vld1q_f32(reinterpret_cast(input.ptr()) + 8), - vld1q_f32(reinterpret_cast(input.ptr()) + 12) - } - }; - - const qint8x16_t texels_s8 = vqcvtq_qs8_f32(texels_f32, _fixed_point_position_output); - - vst1q_s8(reinterpret_cast(output.ptr()), texels_s8); - }, - input, output); - break; - } - case DataType::QS16: - { - /* Down-conversion F32 -> QS16 */ - execute_window_loop(window, [&](const Coordinates & id) - { - const float32x4x2_t texels_f32_1 = - { - { - vld1q_f32(reinterpret_cast(input.ptr())), - vld1q_f32(reinterpret_cast(input.ptr()) + 4), - } - }; - const float32x4x2_t texels_f32_2 = - { - { - vld1q_f32(reinterpret_cast(input.ptr()) + 8), - vld1q_f32(reinterpret_cast(input.ptr()) + 12) - } - }; - - vst1q_s16(reinterpret_cast(output.ptr()), vqcvtq_qs16_f32(texels_f32_1, _fixed_point_position_output)); - vst1q_s16(reinterpret_cast(output.ptr()) + 8, vqcvtq_qs16_f32(texels_f32_2, _fixed_point_position_output)); - }, - input, output); - break; - } - default: - ARM_COMPUTE_ERROR("Output data type not supported"); - } - break; - } - default: - ARM_COMPUTE_ERROR("Not supported"); - } -} diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp new file mode 100644 index 0000000000..c29cb57513 --- /dev/null +++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp @@ -0,0 +1,524 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/NEFixedPoint.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" + +#include + +using namespace arm_compute; + +namespace arm_compute +{ +class Coordinates; +} // namespace arm_compute + +NEDepthConvertLayerKernel::NEDepthConvertLayerKernel() + : _input(nullptr), _output(nullptr), _policy(), _shift(0), _fixed_point_position_input(0), _fixed_point_position_output(0) +{ +} + +void NEDepthConvertLayerKernel::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::F32); + + _input = input; + _output = input; + _policy = policy; + _shift = shift; + + if(output != nullptr) + { + // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given) + set_shape_if_empty(*output->info(), input->info()->tensor_shape()); + + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::U32, DataType::S32, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); + + // Set output + _output = output; + } + + // Set initial fixed point position of input and output + _fixed_point_position_input = input->info()->fixed_point_position(); + _fixed_point_position_output = _output->info()->fixed_point_position(); + + // Set the fixed point position to the output tensor if needed + if(is_data_type_fixed_point(input->info()->data_type()) && is_data_type_fixed_point(_output->info()->data_type())) + { + // If in-place set the fixed point position of the output tensor to be equal to shift + _fixed_point_position_output = (_input == _output) ? static_cast(_shift) : _fixed_point_position_output; + // Set fixed point position to output tensor + _output->info()->set_fixed_point_position(_fixed_point_position_output); + } + + ARM_COMPUTE_ERROR_ON(shift >= 8 && (!is_data_type_fixed_point(input->info()->data_type()) && !is_data_type_fixed_point(output->info()->data_type()))); + ARM_COMPUTE_ERROR_ON(input == output && (data_size_from_type(input->info()->data_type()) != data_size_from_type(output->info()->data_type()))); + + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::S16 && output->info()->data_type() != DataType::U16 + && output->info()->data_type() != DataType::S32), + "Only data_types supported [in] U8 -> [out] U16, S16, S32"); + + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::F32), + "Only data_types supported [in] QS8 -> [out] QS8, F32"); + + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32), + "Only data_types supported [in] U16 -> [out] U8, U32"); + + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::S32), + "Only data_types supported [in] S16 -> [out] U8, S32"); + + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && (output->info()->data_type() != DataType::QS16 && output->info()->data_type() != DataType::F32), + "Only data_types supported [in] QS16 -> [out] QS16, F32"); + + ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::QS16), + "Only data_types supported [in] F32 -> [out] QS8, QS16"); + + constexpr unsigned int num_elems_processed_per_iteration = 16; + + // Configure kernel window + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + + AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); + if(output != nullptr) + { + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, input->info()->valid_region()); + } + else + { + // In-place computation + update_window_and_padding(win, input_access); + } + ICPPKernel::configure(win); +} + +void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); + ARM_COMPUTE_ERROR_ON(nullptr == _input); + ARM_COMPUTE_ERROR_ON(nullptr == _output); + ARM_COMPUTE_ERROR_ON(_input == _output); + + Iterator input(_input, window); + Iterator output(_output, window); + + bool in_place = (_input == _output); + + switch(_input->info()->data_type()) + { + case DataType::U8: + { + const int16x8_t b = vdupq_n_s16(_shift); + + switch(_output->info()->data_type()) + { + case DataType::S16: + { + /* Up-conversion U8 -> S16 */ + execute_window_loop(window, [&](const Coordinates & id) + { + const uint8x16_t texels_u8 = vld1q_u8(input.ptr()); + + const int16x8x2_t texels = + { + { + vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b), + vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b) + } + }; + + vst1q_s16(reinterpret_cast(output.ptr()), texels.val[0]); + vst1q_s16(reinterpret_cast(output.ptr()) + 8, texels.val[1]); + }, + input, output); + break; + } + case DataType::S32: + { + /* Up-conversion U8 -> S32 */ + execute_window_loop(window, [&](const Coordinates & id) + { + const uint8x16_t texels_u8 = vld1q_u8(input.ptr()); + + const int16x8x2_t texels = + { + { + vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b), + vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b) + } + }; + + vst1q_s32(reinterpret_cast(output.ptr()), vmovl_s16(vget_low_s16(texels.val[0]))); + vst1q_s32(reinterpret_cast(output.ptr()) + 4, vmovl_s16(vget_high_s16(texels.val[0]))); + vst1q_s32(reinterpret_cast(output.ptr()) + 8, vmovl_s16(vget_low_s16(texels.val[1]))); + vst1q_s32(reinterpret_cast(output.ptr()) + 12, vmovl_s16(vget_high_s16(texels.val[1]))); + }, + input, output); + break; + } + case DataType::U16: + { + /* Up-conversion U8 -> U16 */ + execute_window_loop(window, [&](const Coordinates & id) + { + const uint8x16_t texels_u8 = vld1q_u8(input.ptr()); + + const uint16x8x2_t texels = + { + { + vshlq_u16(vmovl_u8(vget_low_u8(texels_u8)), b), + vshlq_u16(vmovl_u8(vget_high_u8(texels_u8)), b) + } + }; + + vst1q_u16(reinterpret_cast(output.ptr()), texels.val[0]); + vst1q_u16(reinterpret_cast(output.ptr()) + 8, texels.val[1]); + }, + input, output); + break; + } + default: + ARM_COMPUTE_ERROR("Output data type not supported"); + } + break; + } + case DataType::QS8: + { + switch(_output->info()->data_type()) + { + case DataType::QS8: + { + const int relative_shift = _fixed_point_position_output - _fixed_point_position_input; + /* Fixed point position conversion QS8 -> QS8 */ + if(relative_shift != 0 || !in_place) + { + const auto relative_shift_vec = vdupq_n_qs8(relative_shift); + execute_window_loop(window, [&](const Coordinates & id) + { + const qint8x16_t texels_qs8 = vld1q_qs8(reinterpret_cast(input.ptr())); + vst1q_qs8(reinterpret_cast(output.ptr()), vqrshlq_s8(texels_qs8, relative_shift_vec)); + }, + input, output); + } + break; + } + case DataType::F32: + { + /* Up-conversion QS8 -> F32 */ + execute_window_loop(window, [&](const Coordinates & id) + { + const qint8x16_t texels_qs8 = vld1q_qs8(reinterpret_cast(input.ptr())); + + float32x4x2_t texels_low = vcvt_f32_qs8(vget_low_s8(texels_qs8), _fixed_point_position_input); + float32x4x2_t texels_high = vcvt_f32_qs8(vget_high_s8(texels_qs8), _fixed_point_position_input); + + vst1q_f32(reinterpret_cast(output.ptr()), texels_low.val[0]); + vst1q_f32(reinterpret_cast(output.ptr()) + 4, texels_low.val[1]); + vst1q_f32(reinterpret_cast(output.ptr()) + 8, texels_high.val[0]); + vst1q_f32(reinterpret_cast(output.ptr()) + 12, texels_high.val[1]); + }, + input, output); + break; + } + default: + ARM_COMPUTE_ERROR("Output data type not supported"); + } + break; + } + case DataType::S16: + { + switch(_output->info()->data_type()) + { + case DataType::U8: + { + const int16x8_t b = vdupq_n_s16(-static_cast(_shift)); + + /* Down-conversion S16 -> U8 */ + if(ConvertPolicy::SATURATE == _policy) + { + execute_window_loop(window, [&](const Coordinates & id) + { + const int16x8x2_t texels = + { + { + vqshlq_s16(vld1q_s16(reinterpret_cast(input.ptr())), b), + vqshlq_s16(vld1q_s16(reinterpret_cast(input.ptr()) + 8), b) + } + }; + + vst1q_u8(output.ptr(), vcombine_u8(vqmovun_s16(texels.val[0]), vqmovun_s16(texels.val[1]))); + }, + input, output); + } + else + { + execute_window_loop(window, [&](const Coordinates & id) + { + const int16x8x2_t texels = + { + { + vshlq_s16(vld1q_s16(reinterpret_cast(input.ptr())), b), + vshlq_s16(vld1q_s16(reinterpret_cast(input.ptr()) + 8), b) + } + }; + + vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(vreinterpretq_u16_s16(texels.val[0])), + vmovn_u16(vreinterpretq_u16_s16(texels.val[1])))); + }, + input, output); + } + break; + } + case DataType::S32: + { + const int32x4_t b = vdupq_n_s32(_shift); + + /* Up-conversion S16 -> S32 */ + execute_window_loop(window, [&](const Coordinates & id) + { + const int16x8x2_t texels = + { + { + vld1q_s16(reinterpret_cast(input.ptr())), + vld1q_s16(reinterpret_cast(input.ptr()) + 8) + } + }; + + const int32x4x4_t texels_s32 = + { + { + vshlq_s32(vmovl_s16(vget_low_s16(texels.val[0])), b), + vshlq_s32(vmovl_s16(vget_high_s16(texels.val[0])), b), + vshlq_s32(vmovl_s16(vget_low_s16(texels.val[1])), b), + vshlq_s32(vmovl_s16(vget_high_s16(texels.val[1])), b) + } + }; + + vst1q_s32(reinterpret_cast(output.ptr()), texels_s32.val[0]); + vst1q_s32(reinterpret_cast(output.ptr()) + 4, texels_s32.val[1]); + vst1q_s32(reinterpret_cast(output.ptr()) + 8, texels_s32.val[2]); + vst1q_s32(reinterpret_cast(output.ptr()) + 12, texels_s32.val[3]); + }, + input, output); + break; + } + default: + ARM_COMPUTE_ERROR("Output data type not supported"); + } + break; + } + case DataType::U16: + { + switch(_output->info()->data_type()) + { + case DataType::U8: + { + const int16x8_t b = vdupq_n_s16(-static_cast(_shift)); + + /* Down-conversion U16 -> U8 */ + if(ConvertPolicy::SATURATE == _policy) + { + execute_window_loop(window, [&](const Coordinates & id) + { + const uint16x8x2_t texels = + { + { + vqshlq_u16(vld1q_u16(reinterpret_cast(input.ptr())), b), + vqshlq_u16(vld1q_u16(reinterpret_cast(input.ptr()) + 8), b) + } + }; + + vst1q_u8(output.ptr(), vcombine_u8(vqmovn_u16(texels.val[0]), vqmovn_u16(texels.val[1]))); + }, + input, output); + } + else + { + execute_window_loop(window, [&](const Coordinates & id) + { + const uint16x8x2_t texels = + { + { + vshlq_u16(vld1q_u16(reinterpret_cast(input.ptr())), b), + vshlq_u16(vld1q_u16(reinterpret_cast(input.ptr()) + 8), b) + } + }; + + vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(texels.val[0]), vmovn_u16(texels.val[1]))); + }, + input, output); + } + break; + } + case DataType::U32: + { + const int32x4_t b = vdupq_n_s32(_shift); + + /* Up-conversion U16 -> U32 */ + execute_window_loop(window, [&](const Coordinates & id) + { + const uint16x8x2_t texels = + { + { + vld1q_u16(reinterpret_cast(input.ptr())), + vld1q_u16(reinterpret_cast(input.ptr()) + 8) + } + }; + + vst1q_u32(reinterpret_cast(output.ptr()), vshlq_u32(vmovl_u16(vget_low_u16(texels.val[0])), b)); + vst1q_u32(reinterpret_cast(output.ptr()) + 4, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[0])), b)); + vst1q_u32(reinterpret_cast(output.ptr()) + 8, vshlq_u32(vmovl_u16(vget_low_u16(texels.val[1])), b)); + vst1q_u32(reinterpret_cast(output.ptr()) + 12, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[1])), b)); + }, + input, output); + break; + } + default: + ARM_COMPUTE_ERROR("Output data type not supported"); + } + break; + } + case DataType::QS16: + { + switch(_output->info()->data_type()) + { + case DataType::QS16: + { + const int relative_shift = _fixed_point_position_output - _fixed_point_position_input; + /* Fixed point position conversion QS16 -> QS16 */ + if(relative_shift != 0 || !in_place) + { + const auto relative_shift_vec = vdupq_n_qs16(relative_shift); + execute_window_loop(window, [&](const Coordinates & id) + { + const qint16x8x2_t texels_qs16 = + { + { + vld1q_qs16(reinterpret_cast(input.ptr())), + vld1q_qs16(reinterpret_cast(input.ptr()) + 8) + } + }; + vst1q_qs16(reinterpret_cast(output.ptr()), vqrshlq_s16(texels_qs16.val[0], relative_shift_vec)); + vst1q_qs16(reinterpret_cast(output.ptr()) + 8, vqrshlq_s16(texels_qs16.val[1], relative_shift_vec)); + }, + input, output); + } + break; + } + case DataType::F32: + { + /* Up-conversion QS16 -> F32 */ + execute_window_loop(window, [&](const Coordinates & id) + { + const int16x8x2_t texels_qs16 = + { + { + vld1q_s16(reinterpret_cast(input.ptr())), + vld1q_s16(reinterpret_cast(input.ptr()) + 8) + } + }; + + vst1q_f32(reinterpret_cast(output.ptr()), vcvt_f32_qs16(vget_low_s16(texels_qs16.val[0]), _fixed_point_position_input)); + vst1q_f32(reinterpret_cast(output.ptr()) + 4, vcvt_f32_qs16(vget_high_s16(texels_qs16.val[0]), _fixed_point_position_input)); + vst1q_f32(reinterpret_cast(output.ptr()) + 8, vcvt_f32_qs16(vget_low_s16(texels_qs16.val[1]), _fixed_point_position_input)); + vst1q_f32(reinterpret_cast(output.ptr()) + 12, vcvt_f32_qs16(vget_high_s16(texels_qs16.val[1]), _fixed_point_position_input)); + }, + input, output); + break; + } + default: + ARM_COMPUTE_ERROR("Output data type not supported"); + } + break; + } + case DataType::F32: + { + switch(_output->info()->data_type()) + { + case DataType::QS8: + { + /* Down-conversion F32 -> QS8 */ + execute_window_loop(window, [&](const Coordinates & id) + { + const float32x4x4_t texels_f32 = + { + { + vld1q_f32(reinterpret_cast(input.ptr())), + vld1q_f32(reinterpret_cast(input.ptr()) + 4), + vld1q_f32(reinterpret_cast(input.ptr()) + 8), + vld1q_f32(reinterpret_cast(input.ptr()) + 12) + } + }; + + const qint8x16_t texels_s8 = vqcvtq_qs8_f32(texels_f32, _fixed_point_position_output); + + vst1q_s8(reinterpret_cast(output.ptr()), texels_s8); + }, + input, output); + break; + } + case DataType::QS16: + { + /* Down-conversion F32 -> QS16 */ + execute_window_loop(window, [&](const Coordinates & id) + { + const float32x4x2_t texels_f32_1 = + { + { + vld1q_f32(reinterpret_cast(input.ptr())), + vld1q_f32(reinterpret_cast(input.ptr()) + 4), + } + }; + const float32x4x2_t texels_f32_2 = + { + { + vld1q_f32(reinterpret_cast(input.ptr()) + 8), + vld1q_f32(reinterpret_cast(input.ptr()) + 12) + } + }; + + vst1q_s16(reinterpret_cast(output.ptr()), vqcvtq_qs16_f32(texels_f32_1, _fixed_point_position_output)); + vst1q_s16(reinterpret_cast(output.ptr()) + 8, vqcvtq_qs16_f32(texels_f32_2, _fixed_point_position_output)); + }, + input, output); + break; + } + default: + ARM_COMPUTE_ERROR("Output data type not supported"); + } + break; + } + default: + ARM_COMPUTE_ERROR("Not supported"); + } +} diff --git a/src/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.cpp deleted file mode 100644 index 5c4bd34e05..0000000000 --- a/src/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.cpp +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/convolution/NEDirectConvolutionDetail.h" - -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/AccessWindowTranspose.h" -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -using namespace arm_compute; -using namespace arm_compute::detail; - -NEDepthwiseConvolution3x3Kernel::NEDepthwiseConvolution3x3Kernel() - : _border_size(0), _input(), _output(), _weights(), _conv_info() -{ -} - -BorderSize NEDepthwiseConvolution3x3Kernel::border_size() const -{ - return _border_size; -} - -void NEDepthwiseConvolution3x3Kernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights); - ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3); - - std::pair expected_output = scaled_dimensions(input->info()->tensor_shape().x(), input->info()->tensor_shape().y(), - weights->info()->tensor_shape().x(), weights->info()->tensor_shape().y(), - conv_info); - - ARM_COMPUTE_UNUSED(expected_output); - ARM_COMPUTE_ERROR_ON(expected_output.first != output->info()->tensor_shape().x()); - ARM_COMPUTE_ERROR_ON(expected_output.second != output->info()->tensor_shape().y()); - - _input = input; - _output = output; - _weights = weights; - _conv_info = conv_info; - const unsigned int conv_stride_x = conv_info.stride().first; - const unsigned int conv_pad_x = conv_info.pad().first; - const unsigned int conv_pad_y = conv_info.pad().second; - - ARM_COMPUTE_ERROR_ON(conv_stride_x < 1 || conv_stride_x > 3); - - const unsigned int num_elems_written_per_iteration = 16 >> conv_stride_x; - _border_size = BorderSize(conv_pad_y, conv_pad_x); - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps(num_elems_written_per_iteration)); - - AccessWindowStatic input_access(input->info(), -conv_pad_x, -conv_pad_y, input->info()->dimension(0) + _border_size.right, input->info()->dimension(1) + _border_size.bottom); - AccessWindowStatic weights_access(weights->info(), 0, 0, weights->info()->dimension(0), weights->info()->dimension(1)); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, weights_access, output_access); - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - - INEKernel::configure(win); -} - -template -class convolver_3x3 -{ -public: - static void convolve(const Window &window, unsigned int num_elems_written_per_iteration, - const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info) - { - const int input_stride_x = input->info()->strides_in_bytes().x(); - const int input_stride_y = input->info()->strides_in_bytes().y(); - const int output_stride_y = output->info()->strides_in_bytes().y(); - const int kernel_stride_y = weights->info()->strides_in_bytes().y(); - const int kernel_stride_z = weights->info()->strides_in_bytes().z(); - const int output_w = output->info()->dimension(0); - const int output_h = output->info()->dimension(1); - const int delta_input = get_input_num_elems_processed(num_elems_written_per_iteration); - const unsigned int conv_stride_y = std::get<1>(conv_info.stride()); - const unsigned int conv_pad_x = std::get<0>(conv_info.pad()); - const unsigned int conv_pad_y = std::get<1>(conv_info.pad()); - - // setup output window for the iterator - Window window_out = window; - window_out.set(Window::DimX, Window::Dimension(0, output->info()->dimension(Window::DimX), output->info()->dimension(Window::DimX))); - window_out.set(Window::DimY, Window::Dimension(0, output->info()->dimension(Window::DimY), output->info()->dimension(Window::DimY))); - - // setup input window for the iterator - Window window_in = window; - // we just want execute_window_loop to iterate over the dimensions > 2, so we set the first 2 dimensions to 0 - window_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - window_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - - Window window_k = calculate_max_window(*weights->info(), Steps(1u)); - - Iterator in(input, window_in); - Iterator out(output, window_out); - Iterator w(weights, window_k); - - const uint8_t *weights_ptr = w.ptr(); - - execute_window_loop(window_out, [&](const Coordinates & id) - { - const uint8_t *input_ptr = in.ptr() - conv_pad_x * input_stride_x - conv_pad_y * input_stride_y; - int ih = 0; - int oh = 0; - - const uint8_t *ptr_weights_base = weights_ptr + id.z() * kernel_stride_z; - const auto ptr_weights_r0 = reinterpret_cast(ptr_weights_base); - const auto ptr_weights_r1 = reinterpret_cast(ptr_weights_base + kernel_stride_y); - const auto ptr_weights_r2 = reinterpret_cast(ptr_weights_base + kernel_stride_y * 2); - const auto vw_r0 = load_matrix_row(ptr_weights_r0); - const auto vw_r1 = load_matrix_row(ptr_weights_r1); - const auto vw_r2 = load_matrix_row(ptr_weights_r2); - - for(ih = 0, oh = 0; oh < output_h; ++oh, ih += conv_stride_y) - { - auto in_top = reinterpret_cast(input_ptr + (ih + 0) * input_stride_y); - auto in_mid = reinterpret_cast(input_ptr + (ih + 1) * input_stride_y); - auto in_low = reinterpret_cast(input_ptr + (ih + 2) * input_stride_y); - auto p_out = reinterpret_cast(out.ptr() + oh * output_stride_y); - - for(int ow = 0; ow < output_w; ow += num_elems_written_per_iteration, - in_top += delta_input, in_mid += delta_input, in_low += delta_input, p_out += num_elems_written_per_iteration) - { - auto vres = convolve_3x3(in_top, in_mid, in_low, vw_r0, vw_r1, vw_r2, 0); - store_results(p_out, vres); - } - } - }, - in, out); - } -}; - -void NEDepthwiseConvolution3x3Kernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_UNUSED(info); - - const unsigned int conv_stride_x = _conv_info.stride().first; - const unsigned int num_elems_written_per_iteration = 16 >> conv_stride_x; - - switch(conv_stride_x) - { - case 1: - convolver_3x3<1>::convolve(window, num_elems_written_per_iteration, _input, _weights, _output, _conv_info); - break; - case 2: - convolver_3x3<2>::convolve(window, num_elems_written_per_iteration, _input, _weights, _output, _conv_info); - break; - case 3: - convolver_3x3<3>::convolve(window, num_elems_written_per_iteration, _input, _weights, _output, _conv_info); - break; - default: - ARM_COMPUTE_ERROR("Not implemented"); - } -} diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp new file mode 100644 index 0000000000..02962e0492 --- /dev/null +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/convolution/NEDirectConvolutionDetail.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/AccessWindowTranspose.h" +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +using namespace arm_compute; +using namespace arm_compute::detail; + +NEDepthwiseConvolutionLayer3x3Kernel::NEDepthwiseConvolutionLayer3x3Kernel() + : _border_size(0), _input(), _output(), _weights(), _conv_info() +{ +} + +BorderSize NEDepthwiseConvolutionLayer3x3Kernel::border_size() const +{ + return _border_size; +} + +void NEDepthwiseConvolutionLayer3x3Kernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights); + ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3); + + std::pair expected_output = scaled_dimensions(input->info()->tensor_shape().x(), input->info()->tensor_shape().y(), + weights->info()->tensor_shape().x(), weights->info()->tensor_shape().y(), + conv_info); + + ARM_COMPUTE_UNUSED(expected_output); + ARM_COMPUTE_ERROR_ON(expected_output.first != output->info()->tensor_shape().x()); + ARM_COMPUTE_ERROR_ON(expected_output.second != output->info()->tensor_shape().y()); + + _input = input; + _output = output; + _weights = weights; + _conv_info = conv_info; + const unsigned int conv_stride_x = conv_info.stride().first; + const unsigned int conv_pad_x = conv_info.pad().first; + const unsigned int conv_pad_y = conv_info.pad().second; + + ARM_COMPUTE_ERROR_ON(conv_stride_x < 1 || conv_stride_x > 3); + + const unsigned int num_elems_written_per_iteration = 16 >> conv_stride_x; + _border_size = BorderSize(conv_pad_y, conv_pad_x); + + // Configure kernel window + Window win = calculate_max_window(*output->info(), Steps(num_elems_written_per_iteration)); + + AccessWindowStatic input_access(input->info(), -conv_pad_x, -conv_pad_y, input->info()->dimension(0) + _border_size.right, input->info()->dimension(1) + _border_size.bottom); + AccessWindowStatic weights_access(weights->info(), 0, 0, weights->info()->dimension(0), weights->info()->dimension(1)); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); + + update_window_and_padding(win, input_access, weights_access, output_access); + output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); + + INEKernel::configure(win); +} + +template +class convolver_3x3 +{ +public: + static void convolve(const Window &window, unsigned int num_elems_written_per_iteration, + const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info) + { + const int input_stride_x = input->info()->strides_in_bytes().x(); + const int input_stride_y = input->info()->strides_in_bytes().y(); + const int output_stride_y = output->info()->strides_in_bytes().y(); + const int kernel_stride_y = weights->info()->strides_in_bytes().y(); + const int kernel_stride_z = weights->info()->strides_in_bytes().z(); + const int output_w = output->info()->dimension(0); + const int output_h = output->info()->dimension(1); + const int delta_input = get_input_num_elems_processed(num_elems_written_per_iteration); + const unsigned int conv_stride_y = std::get<1>(conv_info.stride()); + const unsigned int conv_pad_x = std::get<0>(conv_info.pad()); + const unsigned int conv_pad_y = std::get<1>(conv_info.pad()); + + // setup output window for the iterator + Window window_out = window; + window_out.set(Window::DimX, Window::Dimension(0, output->info()->dimension(Window::DimX), output->info()->dimension(Window::DimX))); + window_out.set(Window::DimY, Window::Dimension(0, output->info()->dimension(Window::DimY), output->info()->dimension(Window::DimY))); + + // setup input window for the iterator + Window window_in = window; + // we just want execute_window_loop to iterate over the dimensions > 2, so we set the first 2 dimensions to 0 + window_in.set(Window::DimX, Window::Dimension(0, 0, 0)); + window_in.set(Window::DimY, Window::Dimension(0, 0, 0)); + + Window window_k = calculate_max_window(*weights->info(), Steps(1u)); + + Iterator in(input, window_in); + Iterator out(output, window_out); + Iterator w(weights, window_k); + + const uint8_t *weights_ptr = w.ptr(); + + execute_window_loop(window_out, [&](const Coordinates & id) + { + const uint8_t *input_ptr = in.ptr() - conv_pad_x * input_stride_x - conv_pad_y * input_stride_y; + int ih = 0; + int oh = 0; + + const uint8_t *ptr_weights_base = weights_ptr + id.z() * kernel_stride_z; + const auto ptr_weights_r0 = reinterpret_cast(ptr_weights_base); + const auto ptr_weights_r1 = reinterpret_cast(ptr_weights_base + kernel_stride_y); + const auto ptr_weights_r2 = reinterpret_cast(ptr_weights_base + kernel_stride_y * 2); + const auto vw_r0 = load_matrix_row(ptr_weights_r0); + const auto vw_r1 = load_matrix_row(ptr_weights_r1); + const auto vw_r2 = load_matrix_row(ptr_weights_r2); + + for(ih = 0, oh = 0; oh < output_h; ++oh, ih += conv_stride_y) + { + auto in_top = reinterpret_cast(input_ptr + (ih + 0) * input_stride_y); + auto in_mid = reinterpret_cast(input_ptr + (ih + 1) * input_stride_y); + auto in_low = reinterpret_cast(input_ptr + (ih + 2) * input_stride_y); + auto p_out = reinterpret_cast(out.ptr() + oh * output_stride_y); + + for(int ow = 0; ow < output_w; ow += num_elems_written_per_iteration, + in_top += delta_input, in_mid += delta_input, in_low += delta_input, p_out += num_elems_written_per_iteration) + { + auto vres = convolve_3x3(in_top, in_mid, in_low, vw_r0, vw_r1, vw_r2, 0); + store_results(p_out, vres); + } + } + }, + in, out); + } +}; + +void NEDepthwiseConvolutionLayer3x3Kernel::run(const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_UNUSED(info); + + const unsigned int conv_stride_x = _conv_info.stride().first; + const unsigned int num_elems_written_per_iteration = 16 >> conv_stride_x; + + switch(conv_stride_x) + { + case 1: + convolver_3x3<1>::convolve(window, num_elems_written_per_iteration, _input, _weights, _output, _conv_info); + break; + case 2: + convolver_3x3<2>::convolve(window, num_elems_written_per_iteration, _input, _weights, _output, _conv_info); + break; + case 3: + convolver_3x3<3>::convolve(window, num_elems_written_per_iteration, _input, _weights, _output, _conv_info); + break; + default: + ARM_COMPUTE_ERROR("Not implemented"); + } +} diff --git a/src/core/NEON/kernels/NEL2NormalizeKernel.cpp b/src/core/NEON/kernels/NEL2NormalizeKernel.cpp deleted file mode 100644 index 12c532afd5..0000000000 --- a/src/core/NEON/kernels/NEL2NormalizeKernel.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/NEMath.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include -#include - -using namespace arm_compute; - -namespace -{ -void l2_normalize_X(const ITensor *in, const ITensor *sum, ITensor *out, float epsilon, const Window &window) -{ - Window window_sum(window); - window_sum.set(Window::DimX, Window::Dimension(0, 0, 0)); - - Window in_slice = window.first_slice_window_1D(); - Window sum_slice = window_sum.first_slice_window_1D(); - - do - { - Iterator input_it(in, in_slice); - Iterator sum_it(sum, sum_slice); - Iterator output_it(out, in_slice); - - const float sum_value = *reinterpret_cast(sum_it.ptr()); - const float32x4_t vec_normalize_value = vdupq_n_f32(1.f / std::sqrt(std::max(sum_value, epsilon))); - - execute_window_loop(in_slice, [&](const Coordinates & id) - { - const auto in_ptr = reinterpret_cast(input_it.ptr()); - const auto out_ptr = reinterpret_cast(output_it.ptr()); - - vst1q_f32(out_ptr, vmulq_f32(vld1q_f32(in_ptr), vec_normalize_value)); - }, - input_it, output_it); - } - while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice)); -} -} // namespace - -NEL2NormalizeKernel::NEL2NormalizeKernel() - : _input(nullptr), _sum(nullptr), _output(nullptr), _axis(0), _epsilon(1e-12) -{ -} - -void NEL2NormalizeKernel::configure(const ITensor *input, const ITensor *sum, ITensor *output, unsigned int axis, float epsilon) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output); - ARM_COMPUTE_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Normalization axis greater than max number of dimensions"); - ARM_COMPUTE_ERROR_ON_MSG(axis > 0, "Unsupported normalization axis, Supported axis is 0"); - - // Output auto initialization if not yet initialized - auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); - - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, sum); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - - unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->info()->data_type()); - unsigned int num_elems_processed_per_iteration_sum = (axis == 0) ? 1 : num_elems_processed_per_iteration; - - _input = input; - _sum = sum; - _output = output; - _axis = axis; - _epsilon = epsilon; - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal sum_access(sum->info(), 0, num_elems_processed_per_iteration_sum); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, sum_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region()); - - INEKernel::configure(win); -} - -void NEL2NormalizeKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - switch(_axis) - { - case 0: - l2_normalize_X(_input, _sum, _output, _epsilon, window); - break; - default: - ARM_COMPUTE_ERROR("Unsupported normalization axis"); - } -} diff --git a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp new file mode 100644 index 0000000000..3bf1d9400e --- /dev/null +++ b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/NEMath.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include +#include + +using namespace arm_compute; + +namespace +{ +void l2_normalize_X(const ITensor *in, const ITensor *sum, ITensor *out, float epsilon, const Window &window) +{ + Window window_sum(window); + window_sum.set(Window::DimX, Window::Dimension(0, 0, 0)); + + Window in_slice = window.first_slice_window_1D(); + Window sum_slice = window_sum.first_slice_window_1D(); + + do + { + Iterator input_it(in, in_slice); + Iterator sum_it(sum, sum_slice); + Iterator output_it(out, in_slice); + + const float sum_value = *reinterpret_cast(sum_it.ptr()); + const float32x4_t vec_normalize_value = vdupq_n_f32(1.f / std::sqrt(std::max(sum_value, epsilon))); + + execute_window_loop(in_slice, [&](const Coordinates & id) + { + const auto in_ptr = reinterpret_cast(input_it.ptr()); + const auto out_ptr = reinterpret_cast(output_it.ptr()); + + vst1q_f32(out_ptr, vmulq_f32(vld1q_f32(in_ptr), vec_normalize_value)); + }, + input_it, output_it); + } + while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice)); +} +} // namespace + +NEL2NormalizeLayerKernel::NEL2NormalizeLayerKernel() + : _input(nullptr), _sum(nullptr), _output(nullptr), _axis(0), _epsilon(1e-12) +{ +} + +void NEL2NormalizeLayerKernel::configure(const ITensor *input, const ITensor *sum, ITensor *output, unsigned int axis, float epsilon) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output); + ARM_COMPUTE_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Normalization axis greater than max number of dimensions"); + ARM_COMPUTE_ERROR_ON_MSG(axis > 0, "Unsupported normalization axis, Supported axis is 0"); + + // Output auto initialization if not yet initialized + auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); + + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, sum); + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); + + unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->info()->data_type()); + unsigned int num_elems_processed_per_iteration_sum = (axis == 0) ? 1 : num_elems_processed_per_iteration; + + _input = input; + _sum = sum; + _output = output; + _axis = axis; + _epsilon = epsilon; + + // Configure kernel window + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal sum_access(sum->info(), 0, num_elems_processed_per_iteration_sum); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(win, input_access, sum_access, output_access); + + output_access.set_valid_region(win, input->info()->valid_region()); + + INEKernel::configure(win); +} + +void NEL2NormalizeLayerKernel::run(const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + + switch(_axis) + { + case 0: + l2_normalize_X(_input, _sum, _output, _epsilon, window); + break; + default: + ARM_COMPUTE_ERROR("Unsupported normalization axis"); + } +} diff --git a/src/graph/operations/CLSimpleOperations.cpp b/src/graph/operations/CLSimpleOperations.cpp index 647f88f0e2..8f2bf23ce3 100644 --- a/src/graph/operations/CLSimpleOperations.cpp +++ b/src/graph/operations/CLSimpleOperations.cpp @@ -106,7 +106,7 @@ REGISTER_SIMPLE_OPERATION(CLBatchNormalizationLayerOperation, OPENCL, OperationT return std::move(batch_norm); } -/* DepthConvert Layer */ +/* DepthConvertLayer Layer */ REGISTER_SIMPLE_OPERATION(CLDepthConvertLayerOperation, OPENCL, OperationType::DepthConvertLayer) { ARM_COMPUTE_ERROR_ON(ctx.num_inputs() != 1); @@ -121,7 +121,7 @@ REGISTER_SIMPLE_OPERATION(CLDepthConvertLayerOperation, OPENCL, OperationType::D const auto shift = ctx.parameter("shift"); // Create and configure function - auto depthconvert = arm_compute::support::cpp14::make_unique(); + auto depthconvert = arm_compute::support::cpp14::make_unique(); depthconvert->configure(in, out, conv_policy, shift); // Log info @@ -156,13 +156,13 @@ REGISTER_SIMPLE_OPERATION(CLDepthwiseConvolutionOperation, OPENCL, OperationType bool run_3x3_opt = opt3x3 && weights->info()->dimension(0) == 3; if(run_3x3_opt) { - auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); + auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); depwthwise_conv->configure(in, weights, biases, out, conv_info); func = std::move(depwthwise_conv); } else { - auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); + auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); depwthwise_conv->configure(in, weights, biases, out, conv_info); func = std::move(depwthwise_conv); } @@ -313,7 +313,7 @@ REGISTER_SIMPLE_OPERATION(CLL2NormalizeLayerOperation, OPENCL, OperationType::L2 const auto epsilon = ctx.parameter("epsilon"); // Create and configure function - auto l2_norm = arm_compute::support::cpp14::make_unique(); + auto l2_norm = arm_compute::support::cpp14::make_unique(); l2_norm->configure(in, out, axis, epsilon); // Log info diff --git a/src/graph/operations/NESimpleOperations.cpp b/src/graph/operations/NESimpleOperations.cpp index f234341cec..bb99e8da4b 100644 --- a/src/graph/operations/NESimpleOperations.cpp +++ b/src/graph/operations/NESimpleOperations.cpp @@ -106,7 +106,7 @@ REGISTER_SIMPLE_OPERATION(NEBatchNormalizationLayerOperation, NEON, OperationTyp return std::move(batch_norm); } -/* DepthConvert Layer */ +/* DepthConvertLayer Layer */ REGISTER_SIMPLE_OPERATION(NEDepthConvertLayerOperation, NEON, OperationType::DepthConvertLayer) { ARM_COMPUTE_ERROR_ON(ctx.num_inputs() != 1); @@ -121,7 +121,7 @@ REGISTER_SIMPLE_OPERATION(NEDepthConvertLayerOperation, NEON, OperationType::Dep const auto shift = ctx.parameter("shift"); // Create and configure function - auto depthconvert = arm_compute::support::cpp14::make_unique(); + auto depthconvert = arm_compute::support::cpp14::make_unique(); depthconvert->configure(in, out, conv_policy, shift); // Log info @@ -156,13 +156,13 @@ REGISTER_SIMPLE_OPERATION(NEDepthwiseConvolutionOperation, NEON, OperationType:: bool run_3x3_opt = opt3x3 && weights->info()->dimension(0) == 3; if(run_3x3_opt) { - auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); + auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); depwthwise_conv->configure(in, weights, biases, out, conv_info); func = std::move(depwthwise_conv); } else { - auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); + auto depwthwise_conv = arm_compute::support::cpp14::make_unique(); depwthwise_conv->configure(in, weights, biases, out, conv_info); func = std::move(depwthwise_conv); } @@ -313,7 +313,7 @@ REGISTER_SIMPLE_OPERATION(NEL2NormalizeLayerOperation, NEON, OperationType::L2No const auto epsilon = ctx.parameter("epsilon"); // Create and configure function - auto l2_norm = arm_compute::support::cpp14::make_unique(); + auto l2_norm = arm_compute::support::cpp14::make_unique(); l2_norm->configure(in, out, axis, epsilon); // Log info diff --git a/src/runtime/CL/functions/CLDepthConcatenate.cpp b/src/runtime/CL/functions/CLDepthConcatenate.cpp deleted file mode 100644 index 89e44ca98e..0000000000 --- a/src/runtime/CL/functions/CLDepthConcatenate.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -CLDepthConcatenate::CLDepthConcatenate() // NOLINT - : _inputs_vector(), - _concat_kernels_vector(), - _border_handlers_vector(), - _num_inputs(0) -{ -} - -void CLDepthConcatenate::configure(std::vector inputs_vector, ICLTensor *output) // NOLINT -{ - ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2); - - _num_inputs = inputs_vector.size(); - - unsigned int depth_offset = 0; - - _concat_kernels_vector = arm_compute::support::cpp14::make_unique(_num_inputs); - _border_handlers_vector = arm_compute::support::cpp14::make_unique(_num_inputs); - - TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position()); - - for(unsigned int i = 0; i < _num_inputs; i++) - { - _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output); - _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0)); - - depth_offset += inputs_vector.at(i)->info()->dimension(2); - } -} - -void CLDepthConcatenate::run() -{ - cl::CommandQueue q = CLScheduler::get().queue(); - - for(unsigned i = 0; i < _num_inputs; i++) - { - CLScheduler::get().enqueue(_border_handlers_vector[i], false); - CLScheduler::get().enqueue(_concat_kernels_vector[i], true); - } -} diff --git a/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp b/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp new file mode 100644 index 0000000000..05b5d54cf7 --- /dev/null +++ b/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +CLDepthConcatenateLayer::CLDepthConcatenateLayer() // NOLINT + : _inputs_vector(), + _concat_kernels_vector(), + _border_handlers_vector(), + _num_inputs(0) +{ +} + +void CLDepthConcatenateLayer::configure(std::vector inputs_vector, ICLTensor *output) // NOLINT +{ + ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2); + + _num_inputs = inputs_vector.size(); + + unsigned int depth_offset = 0; + + _concat_kernels_vector = arm_compute::support::cpp14::make_unique(_num_inputs); + _border_handlers_vector = arm_compute::support::cpp14::make_unique(_num_inputs); + + TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector); + + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position()); + + for(unsigned int i = 0; i < _num_inputs; i++) + { + _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output); + _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0)); + + depth_offset += inputs_vector.at(i)->info()->dimension(2); + } +} + +void CLDepthConcatenateLayer::run() +{ + cl::CommandQueue q = CLScheduler::get().queue(); + + for(unsigned i = 0; i < _num_inputs; i++) + { + CLScheduler::get().enqueue(_border_handlers_vector[i], false); + CLScheduler::get().enqueue(_concat_kernels_vector[i], true); + } +} diff --git a/src/runtime/CL/functions/CLDepthConvert.cpp b/src/runtime/CL/functions/CLDepthConvert.cpp deleted file mode 100644 index b64d05b8b1..0000000000 --- a/src/runtime/CL/functions/CLDepthConvert.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2016, 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" - -#include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h" -#include "support/ToolchainSupport.h" - -#include - -using namespace arm_compute; - -void CLDepthConvert::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift) -{ - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, output, policy, shift); - _kernel = std::move(k); -} diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp new file mode 100644 index 0000000000..b448465909 --- /dev/null +++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h" + +#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "support/ToolchainSupport.h" + +#include + +using namespace arm_compute; + +void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, output, policy, shift); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLDepthwiseConvolution.cpp b/src/runtime/CL/functions/CLDepthwiseConvolution.cpp deleted file mode 100644 index 81149508dd..0000000000 --- a/src/runtime/CL/functions/CLDepthwiseConvolution.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -CLDepthwiseConvolution3x3::CLDepthwiseConvolution3x3() - : _kernel(), _border_handler() -{ -} - -void CLDepthwiseConvolution3x3::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - - _kernel.set_target(CLScheduler::get().target()); - _kernel.configure(input, weights, biases, output, conv_info); - - // Configure border handler - PixelValue &&zero_value(0.f); - if(is_data_type_quantized_asymmetric(input->info()->data_type())) - { - zero_value = PixelValue(static_cast(input->info()->quantization_info().offset)); - } - _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, zero_value); -} - -void CLDepthwiseConvolution3x3::run() -{ - CLScheduler::get().enqueue(_border_handler); - CLScheduler::get().enqueue(_kernel); -} - -CLDepthwiseConvolution::CLDepthwiseConvolution() - : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _input_reshaped(), _weights_reshaped(), - _v2mm_output() -{ -} - -void CLDepthwiseConvolution::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != weights->info()->dimension(2)); - - const size_t weights_w = weights->info()->dimension(0); - const size_t weights_h = weights->info()->dimension(1); - const size_t weights_z = weights->info()->dimension(2); - - const bool has_bias = (biases != nullptr); - const GPUTarget gpu_target = CLScheduler::get().target(); - - unsigned int conv_w = 0; - unsigned int conv_h = 0; - std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights_w, weights_h, conv_info); - - // Set up intermediate tensors - const size_t patch_size = weights_w * weights_h + ((has_bias) ? 1 : 0); - const size_t conv_size = conv_w * conv_h; - - // Im2Col configuration - TensorShape shape_im2col = input->info()->tensor_shape(); - shape_im2col.set(0, patch_size); - shape_im2col.set(1, conv_size); - shape_im2col.set(2, weights_z); - const TensorInfo info_im2col(shape_im2col, 1, input->info()->data_type(), input->info()->fixed_point_position()); - _input_reshaped.allocator()->init(info_im2col); - _im2col_kernel.set_target(gpu_target); - _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, has_bias); - - // Weights reshape configuration - const TensorShape shape_weights_reshape(patch_size, weights_z); - const TensorInfo info_weights_reshape(shape_weights_reshape, 1, weights->info()->data_type(), weights->info()->fixed_point_position()); - _weights_reshaped.allocator()->init(info_weights_reshape); - _weights_reshape_kernel.configure(weights, &_weights_reshaped, biases); - - // GEMV configuration - TensorShape shape_v2mm_out = input->info()->tensor_shape(); - shape_v2mm_out.set(0, conv_size * weights_z); - shape_v2mm_out.set(1, 1); - shape_v2mm_out.set(2, 1); - const TensorInfo info_v2mm_out(shape_v2mm_out, 1, input->info()->data_type(), input->info()->fixed_point_position()); - _v2mm_output.allocator()->init(info_v2mm_out); - _v2mm_kernel.set_target(gpu_target); - _v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output); - _vector_to_tensor_kernel.configure(&_v2mm_output, output, conv_w, conv_h); - - BorderSize border_size = _v2mm_kernel.border_size(); - _v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, PixelValue(0)); - - border_size.bottom = 0; - _v2mm_weights_fill_border.configure(&_weights_reshaped, border_size, BorderMode::CONSTANT, PixelValue(0)); - - // Allocate intermediate tensors - _input_reshaped.allocator()->allocate(); - _weights_reshaped.allocator()->allocate(); - _v2mm_output.allocator()->allocate(); -} - -void CLDepthwiseConvolution::run() -{ - CLScheduler::get().enqueue(_im2col_kernel); - - CLScheduler::get().enqueue(_weights_reshape_kernel); - - CLScheduler::get().enqueue(_v2mm_input_fill_border); - CLScheduler::get().enqueue(_v2mm_weights_fill_border); - CLScheduler::get().enqueue(_v2mm_kernel); - - CLScheduler::get().enqueue(_vector_to_tensor_kernel); -} diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp new file mode 100644 index 0000000000..02273fe08b --- /dev/null +++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +CLDepthwiseConvolutionLayer3x3::CLDepthwiseConvolutionLayer3x3() + : _kernel(), _border_handler() +{ +} + +void CLDepthwiseConvolutionLayer3x3::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + + _kernel.set_target(CLScheduler::get().target()); + _kernel.configure(input, weights, biases, output, conv_info); + + // Configure border handler + PixelValue &&zero_value(0.f); + if(is_data_type_quantized_asymmetric(input->info()->data_type())) + { + zero_value = PixelValue(static_cast(input->info()->quantization_info().offset)); + } + _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, zero_value); +} + +void CLDepthwiseConvolutionLayer3x3::run() +{ + CLScheduler::get().enqueue(_border_handler); + CLScheduler::get().enqueue(_kernel); +} + +CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayer() + : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _input_reshaped(), _weights_reshaped(), + _v2mm_output() +{ +} + +void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != weights->info()->dimension(2)); + + const size_t weights_w = weights->info()->dimension(0); + const size_t weights_h = weights->info()->dimension(1); + const size_t weights_z = weights->info()->dimension(2); + + const bool has_bias = (biases != nullptr); + const GPUTarget gpu_target = CLScheduler::get().target(); + + unsigned int conv_w = 0; + unsigned int conv_h = 0; + std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights_w, weights_h, conv_info); + + // Set up intermediate tensors + const size_t patch_size = weights_w * weights_h + ((has_bias) ? 1 : 0); + const size_t conv_size = conv_w * conv_h; + + // Im2Col configuration + TensorShape shape_im2col = input->info()->tensor_shape(); + shape_im2col.set(0, patch_size); + shape_im2col.set(1, conv_size); + shape_im2col.set(2, weights_z); + const TensorInfo info_im2col(shape_im2col, 1, input->info()->data_type(), input->info()->fixed_point_position()); + _input_reshaped.allocator()->init(info_im2col); + _im2col_kernel.set_target(gpu_target); + _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, has_bias); + + // Weights reshape configuration + const TensorShape shape_weights_reshape(patch_size, weights_z); + const TensorInfo info_weights_reshape(shape_weights_reshape, 1, weights->info()->data_type(), weights->info()->fixed_point_position()); + _weights_reshaped.allocator()->init(info_weights_reshape); + _weights_reshape_kernel.configure(weights, &_weights_reshaped, biases); + + // GEMV configuration + TensorShape shape_v2mm_out = input->info()->tensor_shape(); + shape_v2mm_out.set(0, conv_size * weights_z); + shape_v2mm_out.set(1, 1); + shape_v2mm_out.set(2, 1); + const TensorInfo info_v2mm_out(shape_v2mm_out, 1, input->info()->data_type(), input->info()->fixed_point_position()); + _v2mm_output.allocator()->init(info_v2mm_out); + _v2mm_kernel.set_target(gpu_target); + _v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output); + _vector_to_tensor_kernel.configure(&_v2mm_output, output, conv_w, conv_h); + + BorderSize border_size = _v2mm_kernel.border_size(); + _v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, PixelValue(0)); + + border_size.bottom = 0; + _v2mm_weights_fill_border.configure(&_weights_reshaped, border_size, BorderMode::CONSTANT, PixelValue(0)); + + // Allocate intermediate tensors + _input_reshaped.allocator()->allocate(); + _weights_reshaped.allocator()->allocate(); + _v2mm_output.allocator()->allocate(); +} + +void CLDepthwiseConvolutionLayer::run() +{ + CLScheduler::get().enqueue(_im2col_kernel); + + CLScheduler::get().enqueue(_weights_reshape_kernel); + + CLScheduler::get().enqueue(_v2mm_input_fill_border); + CLScheduler::get().enqueue(_v2mm_weights_fill_border); + CLScheduler::get().enqueue(_v2mm_kernel); + + CLScheduler::get().enqueue(_vector_to_tensor_kernel); +} diff --git a/src/runtime/CL/functions/CLL2Normalize.cpp b/src/runtime/CL/functions/CLL2Normalize.cpp deleted file mode 100644 index 99be8cae4c..0000000000 --- a/src/runtime/CL/functions/CLL2Normalize.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/CL/functions/CLL2Normalize.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLL2NormalizeKernel.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -CLL2Normalize::CLL2Normalize(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq() -{ -} - -void CLL2Normalize::configure(ICLTensor *input, ICLTensor *output, unsigned int axis, float epsilon) -{ - // Manage intermediate buffers - _memory_group.manage(&_sumsq); - - // Configure kernels - _reduce_func.configure(input, &_sumsq, axis, ReductionOperation::SUM_SQUARE); - _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon); - - // Allocate intermediate tensor - _sumsq.allocator()->allocate(); -} - -void CLL2Normalize::run() -{ - _memory_group.acquire(); - - _reduce_func.run(); - CLScheduler::get().enqueue(_normalize_kernel, true); - - _memory_group.release(); -} diff --git a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp new file mode 100644 index 0000000000..d1bb65f1c9 --- /dev/null +++ b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +CLL2NormalizeLayer::CLL2NormalizeLayer(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq() +{ +} + +void CLL2NormalizeLayer::configure(ICLTensor *input, ICLTensor *output, unsigned int axis, float epsilon) +{ + // Manage intermediate buffers + _memory_group.manage(&_sumsq); + + // Configure kernels + _reduce_func.configure(input, &_sumsq, axis, ReductionOperation::SUM_SQUARE); + _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon); + + // Allocate intermediate tensor + _sumsq.allocator()->allocate(); +} + +void CLL2NormalizeLayer::run() +{ + _memory_group.acquire(); + + _reduce_func.run(); + CLScheduler::get().enqueue(_normalize_kernel, true); + + _memory_group.release(); +} diff --git a/src/runtime/CL/functions/CLLaplacianPyramid.cpp b/src/runtime/CL/functions/CLLaplacianPyramid.cpp index a395487103..7e5278f380 100644 --- a/src/runtime/CL/functions/CLLaplacianPyramid.cpp +++ b/src/runtime/CL/functions/CLLaplacianPyramid.cpp @@ -29,7 +29,7 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h" -#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h" #include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" #include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h" #include "support/ToolchainSupport.h" diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp deleted file mode 100755 index ed756cf261..0000000000 --- a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -GCDepthConcatenate::GCDepthConcatenate() //NOLINT - : _concat_kernels_vector(), - _border_handlers_vector(), - _num_inputs(0) -{ -} - -void GCDepthConcatenate::configure(std::vector inputs_vector, IGCTensor *output) //NOLINT -{ - ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2); - - _num_inputs = inputs_vector.size(); - - unsigned int depth_offset = 0; - - _concat_kernels_vector = arm_compute::support::cpp14::make_unique(_num_inputs); - _border_handlers_vector = arm_compute::support::cpp14::make_unique(_num_inputs); - - for(unsigned int i = 0; i < _num_inputs; i++) - { - _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output); - _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0)); - - depth_offset += inputs_vector.at(i)->info()->dimension(2); - } -} - -void GCDepthConcatenate::run() -{ - for(unsigned i = 0; i < _num_inputs; i++) - { - GCScheduler::get().enqueue(_border_handlers_vector[i], false); - GCScheduler::get().enqueue(_concat_kernels_vector[i], true); - } -} diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp new file mode 100755 index 0000000000..ee0b121695 --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +GCDepthConcatenateLayer::GCDepthConcatenateLayer() //NOLINT + : _concat_kernels_vector(), + _border_handlers_vector(), + _num_inputs(0) +{ +} + +void GCDepthConcatenateLayer::configure(std::vector inputs_vector, IGCTensor *output) //NOLINT +{ + ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2); + + _num_inputs = inputs_vector.size(); + + unsigned int depth_offset = 0; + + _concat_kernels_vector = arm_compute::support::cpp14::make_unique(_num_inputs); + _border_handlers_vector = arm_compute::support::cpp14::make_unique(_num_inputs); + + for(unsigned int i = 0; i < _num_inputs; i++) + { + _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output); + _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0)); + + depth_offset += inputs_vector.at(i)->info()->dimension(2); + } +} + +void GCDepthConcatenateLayer::run() +{ + for(unsigned i = 0; i < _num_inputs; i++) + { + GCScheduler::get().enqueue(_border_handlers_vector[i], false); + GCScheduler::get().enqueue(_concat_kernels_vector[i], true); + } +} diff --git a/src/runtime/NEON/functions/NEDepthConcatenate.cpp b/src/runtime/NEON/functions/NEDepthConcatenate.cpp deleted file mode 100644 index f8ad2abe61..0000000000 --- a/src/runtime/NEON/functions/NEDepthConcatenate.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -NEDepthConcatenate::NEDepthConcatenate() // NOLINT - : _inputs_vector(), - _concat_kernels_vector(), - _border_handlers_vector(), - _num_inputs(0) -{ -} - -void NEDepthConcatenate::configure(std::vector inputs_vector, ITensor *output) // NOLINT -{ - ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2); - - _num_inputs = inputs_vector.size(); - _concat_kernels_vector = arm_compute::support::cpp14::make_unique(_num_inputs); - _border_handlers_vector = arm_compute::support::cpp14::make_unique(_num_inputs); - - TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position()); - - unsigned int depth_offset = 0; - for(unsigned int i = 0; i < _num_inputs; ++i) - { - _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output); - _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0.f))); - - depth_offset += inputs_vector.at(i)->info()->dimension(2); - } -} - -void NEDepthConcatenate::run() -{ - for(unsigned i = 0; i < _num_inputs; ++i) - { - NEScheduler::get().schedule(&_border_handlers_vector[i], Window::DimX); - NEScheduler::get().schedule(&_concat_kernels_vector[i], Window::DimX); - } -} diff --git a/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp b/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp new file mode 100644 index 0000000000..437c9417ce --- /dev/null +++ b/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +NEDepthConcatenateLayer::NEDepthConcatenateLayer() // NOLINT + : _inputs_vector(), + _concat_kernels_vector(), + _border_handlers_vector(), + _num_inputs(0) +{ +} + +void NEDepthConcatenateLayer::configure(std::vector inputs_vector, ITensor *output) // NOLINT +{ + ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2); + + _num_inputs = inputs_vector.size(); + _concat_kernels_vector = arm_compute::support::cpp14::make_unique(_num_inputs); + _border_handlers_vector = arm_compute::support::cpp14::make_unique(_num_inputs); + + TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector); + + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position()); + + unsigned int depth_offset = 0; + for(unsigned int i = 0; i < _num_inputs; ++i) + { + _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output); + _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0.f))); + + depth_offset += inputs_vector.at(i)->info()->dimension(2); + } +} + +void NEDepthConcatenateLayer::run() +{ + for(unsigned i = 0; i < _num_inputs; ++i) + { + NEScheduler::get().schedule(&_border_handlers_vector[i], Window::DimX); + NEScheduler::get().schedule(&_concat_kernels_vector[i], Window::DimX); + } +} diff --git a/src/runtime/NEON/functions/NEDepthConvert.cpp b/src/runtime/NEON/functions/NEDepthConvert.cpp deleted file mode 100644 index 37857b6534..0000000000 --- a/src/runtime/NEON/functions/NEDepthConvert.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2016, 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" - -#include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h" -#include "support/ToolchainSupport.h" - -#include - -using namespace arm_compute; - -void NEDepthConvert::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift) -{ - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, output, policy, shift); - _kernel = std::move(k); -} diff --git a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp new file mode 100644 index 0000000000..9a75404fcd --- /dev/null +++ b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" + +#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" +#include "support/ToolchainSupport.h" + +#include + +using namespace arm_compute; + +void NEDepthConvertLayer::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, output, policy, shift); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolution.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolution.cpp deleted file mode 100644 index e12bc07464..0000000000 --- a/src/runtime/NEON/functions/NEDepthwiseConvolution.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -NEDepthwiseConvolution3x3::NEDepthwiseConvolution3x3() - : _kernel(), _bias_kernel(), _border_handler(), _has_bias(false) -{ -} - -void NEDepthwiseConvolution3x3::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights); - - // Call convolution kernel - _kernel.configure(input, weights, output, conv_info); - _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0.f))); - if(biases != nullptr) - { - _bias_kernel.configure(output, biases); - _has_bias = true; - } -} - -void NEDepthwiseConvolution3x3::run() -{ - NEScheduler::get().schedule(&_border_handler, Window::DimX); - NEScheduler::get().schedule(&_kernel, Window::DimX); - if(_has_bias) - { - NEScheduler::get().schedule(&_bias_kernel, Window::DimX); - } -} - -NEDepthwiseConvolution::NEDepthwiseConvolution() - : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _input_reshaped(), _weights_reshaped(), _v2mm_output() -{ -} - -void NEDepthwiseConvolution::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != weights->info()->dimension(2)); - - const size_t weights_w = weights->info()->dimension(0); - const size_t weights_h = weights->info()->dimension(1); - const size_t weights_z = weights->info()->dimension(2); - - bool has_bias = (biases != nullptr); - - unsigned int conv_w = 0; - unsigned int conv_h = 0; - std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights_w, weights_h, conv_info); - - // Set up intermediate tensors - const size_t patch_size = weights_w * weights_h + ((has_bias) ? 1 : 0); - const size_t conv_size = conv_w * conv_h; - - // Im2Col configuration - TensorShape shape_im2col = input->info()->tensor_shape(); - shape_im2col.set(0, patch_size); - shape_im2col.set(1, conv_size); - shape_im2col.set(2, weights_z); - const TensorInfo info_im2col(shape_im2col, 1, input->info()->data_type(), input->info()->fixed_point_position()); - _input_reshaped.allocator()->init(info_im2col); - _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, has_bias); - - // Weights reshape configuration - const TensorShape shape_weights_reshape(patch_size, weights_z); - const TensorInfo info_weights_reshape(shape_weights_reshape, 1, weights->info()->data_type(), weights->info()->fixed_point_position()); - _weights_reshaped.allocator()->init(info_weights_reshape); - _weights_reshape_kernel.configure(weights, &_weights_reshaped, biases); - - // GEMV configuration - TensorShape shape_v2mm_out = input->info()->tensor_shape(); - shape_v2mm_out.set(0, conv_size * weights_z); - shape_v2mm_out.set(1, 1); - shape_v2mm_out.set(2, 1); - const TensorInfo info_v2mm_out(shape_v2mm_out, 1, input->info()->data_type(), input->info()->fixed_point_position()); - _v2mm_output.allocator()->init(info_v2mm_out); - _v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output); - _vector_to_tensor_kernel.configure(&_v2mm_output, output, conv_w, conv_h); - - // Allocate intermediate tensors - _input_reshaped.allocator()->allocate(); - _weights_reshaped.allocator()->allocate(); - _v2mm_output.allocator()->allocate(); -} - -void NEDepthwiseConvolution::run() -{ - NEScheduler::get().schedule(&_im2col_kernel, Window::DimX); - NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX); - NEScheduler::get().schedule(&_v2mm_kernel, Window::DimX); - NEScheduler::get().schedule(&_vector_to_tensor_kernel, Window::DimX); -} \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp new file mode 100644 index 0000000000..b890c6f5d5 --- /dev/null +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3() + : _kernel(), _bias_kernel(), _border_handler(), _has_bias(false) +{ +} + +void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights); + + // Call convolution kernel + _kernel.configure(input, weights, output, conv_info); + _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0.f))); + if(biases != nullptr) + { + _bias_kernel.configure(output, biases); + _has_bias = true; + } +} + +void NEDepthwiseConvolutionLayer3x3::run() +{ + NEScheduler::get().schedule(&_border_handler, Window::DimX); + NEScheduler::get().schedule(&_kernel, Window::DimX); + if(_has_bias) + { + NEScheduler::get().schedule(&_bias_kernel, Window::DimX); + } +} + +NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer() + : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _input_reshaped(), _weights_reshaped(), _v2mm_output() +{ +} + +void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != weights->info()->dimension(2)); + + const size_t weights_w = weights->info()->dimension(0); + const size_t weights_h = weights->info()->dimension(1); + const size_t weights_z = weights->info()->dimension(2); + + bool has_bias = (biases != nullptr); + + unsigned int conv_w = 0; + unsigned int conv_h = 0; + std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights_w, weights_h, conv_info); + + // Set up intermediate tensors + const size_t patch_size = weights_w * weights_h + ((has_bias) ? 1 : 0); + const size_t conv_size = conv_w * conv_h; + + // Im2Col configuration + TensorShape shape_im2col = input->info()->tensor_shape(); + shape_im2col.set(0, patch_size); + shape_im2col.set(1, conv_size); + shape_im2col.set(2, weights_z); + const TensorInfo info_im2col(shape_im2col, 1, input->info()->data_type(), input->info()->fixed_point_position()); + _input_reshaped.allocator()->init(info_im2col); + _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, has_bias); + + // Weights reshape configuration + const TensorShape shape_weights_reshape(patch_size, weights_z); + const TensorInfo info_weights_reshape(shape_weights_reshape, 1, weights->info()->data_type(), weights->info()->fixed_point_position()); + _weights_reshaped.allocator()->init(info_weights_reshape); + _weights_reshape_kernel.configure(weights, &_weights_reshaped, biases); + + // GEMV configuration + TensorShape shape_v2mm_out = input->info()->tensor_shape(); + shape_v2mm_out.set(0, conv_size * weights_z); + shape_v2mm_out.set(1, 1); + shape_v2mm_out.set(2, 1); + const TensorInfo info_v2mm_out(shape_v2mm_out, 1, input->info()->data_type(), input->info()->fixed_point_position()); + _v2mm_output.allocator()->init(info_v2mm_out); + _v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output); + _vector_to_tensor_kernel.configure(&_v2mm_output, output, conv_w, conv_h); + + // Allocate intermediate tensors + _input_reshaped.allocator()->allocate(); + _weights_reshaped.allocator()->allocate(); + _v2mm_output.allocator()->allocate(); +} + +void NEDepthwiseConvolutionLayer::run() +{ + NEScheduler::get().schedule(&_im2col_kernel, Window::DimX); + NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX); + NEScheduler::get().schedule(&_v2mm_kernel, Window::DimX); + NEScheduler::get().schedule(&_vector_to_tensor_kernel, Window::DimX); +} \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEL2Normalize.cpp b/src/runtime/NEON/functions/NEL2Normalize.cpp deleted file mode 100644 index 349a781b0b..0000000000 --- a/src/runtime/NEON/functions/NEL2Normalize.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEL2Normalize.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -using namespace arm_compute; - -NEL2Normalize::NEL2Normalize(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq() -{ -} - -void NEL2Normalize::configure(ITensor *input, ITensor *output, unsigned int axis, float epsilon) -{ - // Manage intermediate buffers - _memory_group.manage(&_sumsq); - - // Configure Kernels - _reduce_func.configure(input, &_sumsq, axis, ReductionOperation::SUM_SQUARE); - _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon); - - // Allocate intermediate tensors - _sumsq.allocator()->allocate(); -} - -void NEL2Normalize::run() -{ - _memory_group.acquire(); - - _reduce_func.run(); - NEScheduler::get().schedule(&_normalize_kernel, Window::DimY); - - _memory_group.release(); -} diff --git a/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp b/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp new file mode 100644 index 0000000000..fa62483146 --- /dev/null +++ b/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +using namespace arm_compute; + +NEL2NormalizeLayer::NEL2NormalizeLayer(std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq() +{ +} + +void NEL2NormalizeLayer::configure(ITensor *input, ITensor *output, unsigned int axis, float epsilon) +{ + // Manage intermediate buffers + _memory_group.manage(&_sumsq); + + // Configure Kernels + _reduce_func.configure(input, &_sumsq, axis, ReductionOperation::SUM_SQUARE); + _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon); + + // Allocate intermediate tensors + _sumsq.allocator()->allocate(); +} + +void NEL2NormalizeLayer::run() +{ + _memory_group.acquire(); + + _reduce_func.run(); + NEScheduler::get().schedule(&_normalize_kernel, Window::DimY); + + _memory_group.release(); +} diff --git a/src/runtime/NEON/functions/NELaplacianPyramid.cpp b/src/runtime/NEON/functions/NELaplacianPyramid.cpp index a680f1f11d..0e149d4176 100644 --- a/src/runtime/NEON/functions/NELaplacianPyramid.cpp +++ b/src/runtime/NEON/functions/NELaplacianPyramid.cpp @@ -28,7 +28,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" #include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" #include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" #include "arm_compute/runtime/Tensor.h" diff --git a/tests/benchmark/CL/DepthwiseConvolution.cpp b/tests/benchmark/CL/DepthwiseConvolution.cpp deleted file mode 100644 index 40412da6f9..0000000000 --- a/tests/benchmark/CL/DepthwiseConvolution.cpp +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h" -#include "tests/CL/CLAccessor.h" -#include "tests/benchmark/fixtures/DepthwiseConvolutionFixture.h" -#include "tests/datasets/MobileNetDepthwiseConvolutionDataset.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "utils/TypePrinter.h" - -namespace arm_compute -{ -namespace test -{ -const auto data_types = framework::dataset::make("DataType", { DataType::F32 }); -using CLDepthwiseConvolutionFixture = DepthwiseConvolutionFixture; - -TEST_SUITE(CL) - -REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetDepthwiseConvolution, CLDepthwiseConvolutionFixture, framework::DatasetMode::ALL, - framework::dataset::combine(framework::dataset::combine(datasets::MobileNetDepthwiseConvolutionDataset(), data_types), - framework::dataset::make("Batches", { 1 }))); - -TEST_SUITE_END() -} // namespace test -} // namespace arm_compute diff --git a/tests/benchmark/CL/DepthwiseConvolutionLayer.cpp b/tests/benchmark/CL/DepthwiseConvolutionLayer.cpp new file mode 100644 index 0000000000..be6fba0a90 --- /dev/null +++ b/tests/benchmark/CL/DepthwiseConvolutionLayer.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h" +#include "tests/CL/CLAccessor.h" +#include "tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h" +#include "tests/datasets/MobileNetDepthwiseConvolutionLayerDataset.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "utils/TypePrinter.h" + +namespace arm_compute +{ +namespace test +{ +const auto data_types = framework::dataset::make("DataType", { DataType::F32 }); +using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerFixture; + +TEST_SUITE(CL) + +REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetDepthwiseConvolutionLayer, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::MobileNetDepthwiseConvolutionLayerDataset(), data_types), + framework::dataset::make("Batches", { 1 }))); + +TEST_SUITE_END() +} // namespace test +} // namespace arm_compute diff --git a/tests/benchmark/CL/SYSTEM/MobileNet.cpp b/tests/benchmark/CL/SYSTEM/MobileNet.cpp index c745a0acab..4712bc0c80 100644 --- a/tests/benchmark/CL/SYSTEM/MobileNet.cpp +++ b/tests/benchmark/CL/SYSTEM/MobileNet.cpp @@ -27,7 +27,7 @@ #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h" +#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" @@ -46,7 +46,7 @@ using CLMobileNetFixture = MobileNetFixture; diff --git a/tests/benchmark/CL/SYSTEM/MobileNetV1.cpp b/tests/benchmark/CL/SYSTEM/MobileNetV1.cpp index 66be3231cf..851148a860 100644 --- a/tests/benchmark/CL/SYSTEM/MobileNetV1.cpp +++ b/tests/benchmark/CL/SYSTEM/MobileNetV1.cpp @@ -28,7 +28,7 @@ #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h" #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h" +#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" @@ -49,7 +49,7 @@ using CLMobileNetV1_224_Fixture = MobileNetV1Fixture -class DepthwiseConvolutionFixture : public framework::Fixture -{ -public: - template - void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape dst_shape, PadStrideInfo info, DataType data_type, int batches) - { - // Set batched in source and destination shapes - const unsigned int fixed_point_position = 4; - src_shape.set(3 /* batch */, batches); - dst_shape.set(3 /* batch */, batches); - - // Create tensors - src = create_tensor(src_shape, data_type, 1, fixed_point_position); - weights = create_tensor(weights_shape, data_type, 1, fixed_point_position); - biases = create_tensor(biases_shape, data_type, 1, fixed_point_position); - dst = create_tensor(dst_shape, data_type, 1, fixed_point_position); - - // Create and configure function - depth_conv.configure(&src, &weights, &biases, &dst, info); - - // Allocate tensors - src.allocator()->allocate(); - weights.allocator()->allocate(); - biases.allocator()->allocate(); - dst.allocator()->allocate(); - - // Fill tensors - library->fill_tensor_uniform(Accessor(src), 0); - library->fill_tensor_uniform(Accessor(weights), 1); - } - - void run() - { - depth_conv.run(); - } - - void teardown() - { - src.allocator()->free(); - weights.allocator()->free(); - biases.allocator()->free(); - dst.allocator()->free(); - } - -private: - TensorType src{}; - TensorType weights{}; - TensorType biases{}; - TensorType dst{}; - Function depth_conv{}; -}; -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_DEPTHWISECONVOLUTIONFIXTURE */ diff --git a/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h new file mode 100644 index 0000000000..9a49d5613a --- /dev/null +++ b/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_DEPTHWISECONVOLUTIONFIXTURE +#define ARM_COMPUTE_TEST_DEPTHWISECONVOLUTIONFIXTURE + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/Globals.h" +#include "tests/Utils.h" +#include "tests/framework/Fixture.h" + +namespace arm_compute +{ +namespace test +{ +/** Fixture that can be used for NEON and CL */ +template +class DepthwiseConvolutionLayerFixture : public framework::Fixture +{ +public: + template + void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape dst_shape, PadStrideInfo info, DataType data_type, int batches) + { + // Set batched in source and destination shapes + const unsigned int fixed_point_position = 4; + src_shape.set(3 /* batch */, batches); + dst_shape.set(3 /* batch */, batches); + + // Create tensors + src = create_tensor(src_shape, data_type, 1, fixed_point_position); + weights = create_tensor(weights_shape, data_type, 1, fixed_point_position); + biases = create_tensor(biases_shape, data_type, 1, fixed_point_position); + dst = create_tensor(dst_shape, data_type, 1, fixed_point_position); + + // Create and configure function + depth_conv.configure(&src, &weights, &biases, &dst, info); + + // Allocate tensors + src.allocator()->allocate(); + weights.allocator()->allocate(); + biases.allocator()->allocate(); + dst.allocator()->allocate(); + + // Fill tensors + library->fill_tensor_uniform(Accessor(src), 0); + library->fill_tensor_uniform(Accessor(weights), 1); + } + + void run() + { + depth_conv.run(); + } + + void teardown() + { + src.allocator()->free(); + weights.allocator()->free(); + biases.allocator()->free(); + dst.allocator()->free(); + } + +private: + TensorType src{}; + TensorType weights{}; + TensorType biases{}; + TensorType dst{}; + Function depth_conv{}; +}; +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_DEPTHWISECONVOLUTIONFIXTURE */ diff --git a/tests/benchmark/fixtures/MobileNetFixture.h b/tests/benchmark/fixtures/MobileNetFixture.h index 6c1ee300c1..660205c7ec 100644 --- a/tests/benchmark/fixtures/MobileNetFixture.h +++ b/tests/benchmark/fixtures/MobileNetFixture.h @@ -38,7 +38,7 @@ template class MobileNetFixture : public framework::Fixture @@ -69,7 +69,7 @@ private: ActivationLayerFunction, ConvolutionLayerFunction, DirectConvolutionLayerFunction, - DepthwiseConvolutionFunction, + DepthwiseConvolutionLayerFunction, ReshapeFunction, PoolingLayerFunction> network{}; diff --git a/tests/datasets/DepthwiseConvolutionDataset.h b/tests/datasets/DepthwiseConvolutionDataset.h deleted file mode 100644 index 2c8347fc8c..0000000000 --- a/tests/datasets/DepthwiseConvolutionDataset.h +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET -#define ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET - -#include "utils/TypePrinter.h" - -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -namespace test -{ -namespace datasets -{ -class DepthwiseConvolutionDataset -{ -public: - using type = std::tuple; - - struct iterator - { - iterator(std::vector::const_iterator src_it, - std::vector::const_iterator weights_it, - std::vector::const_iterator biases_it, - std::vector::const_iterator dst_it, - std::vector::const_iterator infos_it) - : _src_it{ std::move(src_it) }, - _weights_it{ std::move(weights_it) }, - _biases_it{ std::move(biases_it) }, - _dst_it{ std::move(dst_it) }, - _infos_it{ std::move(infos_it) } - { - } - - std::string description() const - { - std::stringstream description; - description << "In=" << *_src_it << ":"; - description << "Weights=" << *_weights_it << ":"; - description << "Biases=" << *_biases_it << ":"; - description << "Out=" << *_dst_it << ":"; - description << "Info=" << *_infos_it; - return description.str(); - } - - DepthwiseConvolutionDataset::type operator*() const - { - return std::make_tuple(*_src_it, *_weights_it, *_biases_it, *_dst_it, *_infos_it); - } - - iterator &operator++() - { - ++_src_it; - ++_weights_it; - ++_biases_it; - ++_dst_it; - ++_infos_it; - - return *this; - } - - private: - std::vector::const_iterator _src_it; - std::vector::const_iterator _weights_it; - std::vector::const_iterator _biases_it; - std::vector::const_iterator _dst_it; - std::vector::const_iterator _infos_it; - }; - - iterator begin() const - { - return iterator(_src_shapes.begin(), _weight_shapes.begin(), _biases_shapes.begin(), _dst_shapes.begin(), _infos.begin()); - } - - int size() const - { - return std::min(_src_shapes.size(), std::min(_weight_shapes.size(), std::min(_biases_shapes.size(), std::min(_dst_shapes.size(), _infos.size())))); - } - - void add_config(TensorShape src, TensorShape weights, TensorShape biases, TensorShape dst, PadStrideInfo info) - { - _src_shapes.emplace_back(std::move(src)); - _weight_shapes.emplace_back(std::move(weights)); - _biases_shapes.emplace_back(std::move(biases)); - _dst_shapes.emplace_back(std::move(dst)); - _infos.emplace_back(std::move(info)); - } - -protected: - DepthwiseConvolutionDataset() = default; - DepthwiseConvolutionDataset(DepthwiseConvolutionDataset &&) = default; - -private: - std::vector _src_shapes{}; - std::vector _weight_shapes{}; - std::vector _biases_shapes{}; - std::vector _dst_shapes{}; - std::vector _infos{}; -}; -class SmallDepthwiseConvolutionDataset final : public DepthwiseConvolutionDataset -{ -public: - SmallDepthwiseConvolutionDataset() - { - add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 5U, 5U), TensorShape(5U), TensorShape(11U, 23U, 5U), PadStrideInfo(2, 1, 0, 0)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(7U, 3U, 7U), TensorShape(7U), TensorShape(10U, 13U, 7U), PadStrideInfo(3, 2, 1, 0)); - add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1)); - add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 9U, 2U), TensorShape(2U), TensorShape(15U, 13U, 2U), PadStrideInfo(1, 2, 1, 1)); - add_config(TensorShape(23U, 27U, 5U), TensorShape(11U, 3U, 5U), TensorShape(5U), TensorShape(13U, 13U, 5U), PadStrideInfo(1, 2, 0, 0)); - add_config(TensorShape(17U, 31U, 2U, 3U), TensorShape(5U, 9U, 2U), TensorShape(2U), TensorShape(15U, 13U, 2U, 3U), PadStrideInfo(1, 2, 1, 1)); - // Asymmetric padding - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); - } -}; - -class LargeDepthwiseConvolutionDataset final : public DepthwiseConvolutionDataset -{ -public: - LargeDepthwiseConvolutionDataset() - { - add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U), PadStrideInfo(2, 1, 0, 0)); - add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0)); - add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1)); - add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0)); - add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U), PadStrideInfo(2, 3, 0, 1)); - add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1)); - } -}; - -class SmallDepthwiseConvolutionDataset3x3 final : public DepthwiseConvolutionDataset -{ -public: - SmallDepthwiseConvolutionDataset3x3() - { - add_config(TensorShape(7U, 7U, 3U, 2U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U, 2U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1)); - add_config(TensorShape(21U, 31U, 9U, 4U), TensorShape(3U, 3U, 9U), TensorShape(9U), TensorShape(21U, 15U, 9U, 4U), PadStrideInfo(1, 2, 1, 0)); - add_config(TensorShape(33U, 27U, 11U, 3U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U, 3U), PadStrideInfo(1, 2, 0, 1)); - } -}; - -class LargeDepthwiseConvolutionDataset3x3 final : public DepthwiseConvolutionDataset -{ -public: - LargeDepthwiseConvolutionDataset3x3() - { - add_config(TensorShape(233U, 277U, 55U, 3U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U, 3U), PadStrideInfo(2, 1, 0, 0)); - add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0)); - add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1)); - add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0)); - add_config(TensorShape(333U, 277U, 77U, 5U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U, 5U), PadStrideInfo(2, 3, 0, 1)); - add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1)); - } -}; -} // namespace datasets -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET */ diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h new file mode 100644 index 0000000000..a2caba9b2c --- /dev/null +++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET +#define ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET + +#include "utils/TypePrinter.h" + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ +class DepthwiseConvolutionLayerDataset +{ +public: + using type = std::tuple; + + struct iterator + { + iterator(std::vector::const_iterator src_it, + std::vector::const_iterator weights_it, + std::vector::const_iterator biases_it, + std::vector::const_iterator dst_it, + std::vector::const_iterator infos_it) + : _src_it{ std::move(src_it) }, + _weights_it{ std::move(weights_it) }, + _biases_it{ std::move(biases_it) }, + _dst_it{ std::move(dst_it) }, + _infos_it{ std::move(infos_it) } + { + } + + std::string description() const + { + std::stringstream description; + description << "In=" << *_src_it << ":"; + description << "Weights=" << *_weights_it << ":"; + description << "Biases=" << *_biases_it << ":"; + description << "Out=" << *_dst_it << ":"; + description << "Info=" << *_infos_it; + return description.str(); + } + + DepthwiseConvolutionLayerDataset::type operator*() const + { + return std::make_tuple(*_src_it, *_weights_it, *_biases_it, *_dst_it, *_infos_it); + } + + iterator &operator++() + { + ++_src_it; + ++_weights_it; + ++_biases_it; + ++_dst_it; + ++_infos_it; + + return *this; + } + + private: + std::vector::const_iterator _src_it; + std::vector::const_iterator _weights_it; + std::vector::const_iterator _biases_it; + std::vector::const_iterator _dst_it; + std::vector::const_iterator _infos_it; + }; + + iterator begin() const + { + return iterator(_src_shapes.begin(), _weight_shapes.begin(), _biases_shapes.begin(), _dst_shapes.begin(), _infos.begin()); + } + + int size() const + { + return std::min(_src_shapes.size(), std::min(_weight_shapes.size(), std::min(_biases_shapes.size(), std::min(_dst_shapes.size(), _infos.size())))); + } + + void add_config(TensorShape src, TensorShape weights, TensorShape biases, TensorShape dst, PadStrideInfo info) + { + _src_shapes.emplace_back(std::move(src)); + _weight_shapes.emplace_back(std::move(weights)); + _biases_shapes.emplace_back(std::move(biases)); + _dst_shapes.emplace_back(std::move(dst)); + _infos.emplace_back(std::move(info)); + } + +protected: + DepthwiseConvolutionLayerDataset() = default; + DepthwiseConvolutionLayerDataset(DepthwiseConvolutionLayerDataset &&) = default; + +private: + std::vector _src_shapes{}; + std::vector _weight_shapes{}; + std::vector _biases_shapes{}; + std::vector _dst_shapes{}; + std::vector _infos{}; +}; +class SmallDepthwiseConvolutionLayerDataset final : public DepthwiseConvolutionLayerDataset +{ +public: + SmallDepthwiseConvolutionLayerDataset() + { + add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 5U, 5U), TensorShape(5U), TensorShape(11U, 23U, 5U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(7U, 3U, 7U), TensorShape(7U), TensorShape(10U, 13U, 7U), PadStrideInfo(3, 2, 1, 0)); + add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1)); + add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 9U, 2U), TensorShape(2U), TensorShape(15U, 13U, 2U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(23U, 27U, 5U), TensorShape(11U, 3U, 5U), TensorShape(5U), TensorShape(13U, 13U, 5U), PadStrideInfo(1, 2, 0, 0)); + add_config(TensorShape(17U, 31U, 2U, 3U), TensorShape(5U, 9U, 2U), TensorShape(2U), TensorShape(15U, 13U, 2U, 3U), PadStrideInfo(1, 2, 1, 1)); + // Asymmetric padding + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + } +}; + +class LargeDepthwiseConvolutionLayerDataset final : public DepthwiseConvolutionLayerDataset +{ +public: + LargeDepthwiseConvolutionLayerDataset() + { + add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0)); + add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0)); + add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U), PadStrideInfo(2, 3, 0, 1)); + add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1)); + } +}; + +class SmallDepthwiseConvolutionLayerDataset3x3 final : public DepthwiseConvolutionLayerDataset +{ +public: + SmallDepthwiseConvolutionLayerDataset3x3() + { + add_config(TensorShape(7U, 7U, 3U, 2U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U, 2U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1)); + add_config(TensorShape(21U, 31U, 9U, 4U), TensorShape(3U, 3U, 9U), TensorShape(9U), TensorShape(21U, 15U, 9U, 4U), PadStrideInfo(1, 2, 1, 0)); + add_config(TensorShape(33U, 27U, 11U, 3U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U, 3U), PadStrideInfo(1, 2, 0, 1)); + } +}; + +class LargeDepthwiseConvolutionLayerDataset3x3 final : public DepthwiseConvolutionLayerDataset +{ +public: + LargeDepthwiseConvolutionLayerDataset3x3() + { + add_config(TensorShape(233U, 277U, 55U, 3U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U, 3U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0)); + add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0)); + add_config(TensorShape(333U, 277U, 77U, 5U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U, 5U), PadStrideInfo(2, 3, 0, 1)); + add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1)); + } +}; +} // namespace datasets +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET */ diff --git a/tests/datasets/MobileNetDepthwiseConvolutionDataset.h b/tests/datasets/MobileNetDepthwiseConvolutionDataset.h deleted file mode 100644 index 918815f41e..0000000000 --- a/tests/datasets/MobileNetDepthwiseConvolutionDataset.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_CONVOLUTION_DATASET -#define ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_CONVOLUTION_DATASET - -#include "tests/datasets/DepthwiseConvolutionDataset.h" - -#include "utils/TypePrinter.h" - -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -namespace test -{ -namespace datasets -{ -class MobileNetDepthwiseConvolutionDataset final : public DepthwiseConvolutionDataset -{ -public: - MobileNetDepthwiseConvolutionDataset() - { - add_config(TensorShape(7U, 7U, 1024U), TensorShape(3U, 3U, 1024U), TensorShape(1024U), TensorShape(3U, 3U, 1024U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); - add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(512U), TensorShape(7U, 7U, 512U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); - add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(256U), TensorShape(14U, 14U, 256U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); - add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(256U), TensorShape(28U, 28U, 256U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); - add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(128U), TensorShape(56U, 56U, 128U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U), TensorShape(64U), TensorShape(56U, 56U, 64U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); - add_config(TensorShape(112U, 112U, 32U), TensorShape(3U, 3U, 32U), TensorShape(32U), TensorShape(112U, 112U, 32U), PadStrideInfo(1, 1, 1, 1)); - } -}; -} // namespace datasets -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_CONVOLUTION_DATASET */ diff --git a/tests/datasets/MobileNetDepthwiseConvolutionLayerDataset.h b/tests/datasets/MobileNetDepthwiseConvolutionLayerDataset.h new file mode 100644 index 0000000000..5531a08d8e --- /dev/null +++ b/tests/datasets/MobileNetDepthwiseConvolutionLayerDataset.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_CONVOLUTION_DATASET +#define ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_CONVOLUTION_DATASET + +#include "tests/datasets/DepthwiseConvolutionLayerDataset.h" + +#include "utils/TypePrinter.h" + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ +class MobileNetDepthwiseConvolutionLayerDataset final : public DepthwiseConvolutionLayerDataset +{ +public: + MobileNetDepthwiseConvolutionLayerDataset() + { + add_config(TensorShape(7U, 7U, 1024U), TensorShape(3U, 3U, 1024U), TensorShape(1024U), TensorShape(3U, 3U, 1024U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(512U), TensorShape(7U, 7U, 512U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(256U), TensorShape(14U, 14U, 256U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(256U), TensorShape(28U, 28U, 256U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(128U), TensorShape(56U, 56U, 128U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U), TensorShape(64U), TensorShape(56U, 56U, 64U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + add_config(TensorShape(112U, 112U, 32U), TensorShape(3U, 3U, 32U), TensorShape(32U), TensorShape(112U, 112U, 32U), PadStrideInfo(1, 1, 1, 1)); + } +}; +} // namespace datasets +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_CONVOLUTION_DATASET */ diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h index 173ee74958..02a71aa7b5 100644 --- a/tests/datasets/ShapeDatasets.h +++ b/tests/datasets/ShapeDatasets.h @@ -269,11 +269,11 @@ public: } }; -/** Data set containing 2D tensor shapes for DepthConcatenate. */ -class DepthConcatenateShapes final : public ShapeDataset +/** Data set containing 2D tensor shapes for DepthConcatenateLayer. */ +class DepthConcatenateLayerShapes final : public ShapeDataset { public: - DepthConcatenateShapes() + DepthConcatenateLayerShapes() : ShapeDataset("Shape", { TensorShape{ 322U, 243U }, diff --git a/tests/networks/MobileNetNetwork.h b/tests/networks/MobileNetNetwork.h index 74dce0e348..1bc8ad9a0c 100644 --- a/tests/networks/MobileNetNetwork.h +++ b/tests/networks/MobileNetNetwork.h @@ -47,7 +47,7 @@ template class MobileNetNetwork @@ -279,9 +279,9 @@ private: ConvolutionLayerFunction conv3x3{}; ActivationLayerFunction conv3x3_act{}; - std::array act{ {} }; - std::array conv1x1{ {} }; - std::array dwc3x3{ {} }; + std::array act{ {} }; + std::array conv1x1{ {} }; + std::array dwc3x3{ {} }; PoolingLayerFunction pool{}; ActivationLayerFunction logistic{}; ReshapeFunction reshape{}; diff --git a/tests/validation/CL/DepthConcatenateLayer.cpp b/tests/validation/CL/DepthConcatenateLayer.cpp index 19a8b369ce..02901371bb 100644 --- a/tests/validation/CL/DepthConcatenateLayer.cpp +++ b/tests/validation/CL/DepthConcatenateLayer.cpp @@ -24,7 +24,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h" +#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h" #include "tests/CL/CLAccessor.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/framework/Asserts.h" @@ -45,7 +45,7 @@ TEST_SUITE(DepthConcatenateLayer) //TODO(COMPMID-415): Add configuration test? template -using CLDepthConcatenateLayerFixture = DepthConcatenateValidationFixture; +using CLDepthConcatenateLayerFixture = DepthConcatenateLayerValidationFixture; TEST_SUITE(Float) TEST_SUITE(FP16) @@ -70,7 +70,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture, framewor // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), framework::dataset::make("DataType", +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType", DataType::F32))) { // Validate output @@ -88,7 +88,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture, framewo // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType", DataType::QS8))) { @@ -105,7 +105,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture, framew // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType", DataType::QS16))) { diff --git a/tests/validation/CL/DepthConvert.cpp b/tests/validation/CL/DepthConvert.cpp deleted file mode 100644 index 57669f0a52..0000000000 --- a/tests/validation/CL/DepthConvert.cpp +++ /dev/null @@ -1,484 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" -#include "tests/CL/CLAccessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ConvertPolicyDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/DepthConvertFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -/** Input data sets **/ -const auto DepthConvertU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16)); -const auto DepthConvertU8toS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16)); -const auto DepthConvertU8toS32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32)); -const auto DepthConvertU16toU8Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8)); -const auto DepthConvertU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32)); -const auto DepthConvertS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); -const auto DepthConvertS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); -const auto DepthConvertQS8toFP32Dataset = combine(framework::dataset::make("DataType", DataType::QS8), framework::dataset::make("DataType", DataType::F32)); -const auto DepthConvertQS16toFP32Dataset = combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::F32)); -const auto DepthConvertFP32toQS8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS8)); -const auto DepthConvertFP32toQS16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS16)); -const auto DepthConvertShiftDataset = framework::dataset::make("Shift", 0, 7); -const auto DepthConvertFixedPointQuantizedDataset = framework::dataset::make("FractionalBits", 1, 7); -} // namespace - -TEST_SUITE(CL) -TEST_SUITE(DepthConvert) -template -using CLDepthConvertToU16Fixture = DepthConvertValidationFixture; -template -using CLDepthConvertToS16Fixture = DepthConvertValidationFixture; -template -using CLDepthConvertToS32Fixture = DepthConvertValidationFixture; -template -using CLDepthConvertToU8Fixture = DepthConvertValidationFixture; -template -using CLDepthConvertToU32Fixture = DepthConvertValidationFixture; -template -using CLDepthConvertToFP32FixedPointFixture = DepthConvertValidationFractionalBitsFixture; -template -using CLDepthConvertToQS8FixedPointFixture = DepthConvertValidationFractionalBitsFixture; -template -using CLDepthConvertToQS16FixedPointFixture = DepthConvertValidationFractionalBitsFixture; - -TEST_SUITE(U8_to_U16) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset), - shape, policy, shift) -{ - int fixed_point_position = 0; - - // Create tensors - CLTensor src = create_tensor(shape, DataType::U8, 1, fixed_point_position); - CLTensor dst = create_tensor(shape, DataType::U16, 1, fixed_point_position); - - // Create and Configure function - CLDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToU16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toU16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToU16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toU16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(U8_to_S16) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset), - shape, policy, shift) -{ - int fixed_point_position = 0; - - // Create tensors - CLTensor src = create_tensor(shape, DataType::U8, 1, fixed_point_position); - CLTensor dst = create_tensor(shape, DataType::S16, 1, fixed_point_position); - - // Create and Configure function - CLDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToS16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToS16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() -TEST_SUITE(U8_to_S32) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset), - shape, policy, shift) -{ - int fixed_point_position = 0; - - // Create tensors - CLTensor src = create_tensor(shape, DataType::U8, 1, fixed_point_position); - CLTensor dst = create_tensor(shape, DataType::S32, 1, fixed_point_position); - - // Create and Configure function - CLDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToS32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toS32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToS32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toS32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(U16_to_U8) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset), - shape, policy, shift) -{ - int fixed_point_position = 0; - - // Create tensors - CLTensor src = create_tensor(shape, DataType::U16, 1, fixed_point_position); - CLTensor dst = create_tensor(shape, DataType::U8, 1, fixed_point_position); - - // Create and Configure function - CLDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToU8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU16toU8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToU8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU16toU8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(U16_to_U32) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset), - shape, policy, shift) -{ - int fixed_point_position = 0; - - // Create tensors - CLTensor src = create_tensor(shape, DataType::U16, 1, fixed_point_position); - CLTensor dst = create_tensor(shape, DataType::U32, 1, fixed_point_position); - - // Create and Configure function - CLDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToU32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU16toU32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToU32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU16toU32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(S16_to_U8) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset), - shape, policy, shift) -{ - int fixed_point_position = 0; - - // Create tensors - CLTensor src = create_tensor(shape, DataType::S16, 1, fixed_point_position); - CLTensor dst = create_tensor(shape, DataType::U8, 1, fixed_point_position); - - // Create and Configure function - CLDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToU8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertS16toU8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToU8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertS16toU8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(S16_to_S32) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset), - shape, policy, shift) -{ - int fixed_point_position = 0; - - // Create tensors - CLTensor src = create_tensor(shape, DataType::S16, 1, fixed_point_position); - CLTensor dst = create_tensor(shape, DataType::S32, 1, fixed_point_position); - - // Create and Configure function - CLDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToS32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertS16toS32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToS32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertS16toS32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(Quantized_to_FP32) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset), - shape, dt, policy, fixed_point_position) -{ - int shift = 0; - - // Create tensors - CLTensor src = create_tensor(shape, dt, 1, fixed_point_position); - CLTensor dst = create_tensor(shape, DataType::F32, 1, fixed_point_position); - - // Create and Configure function - CLDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} -FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertToFP32FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertQS8toFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertToFP32FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertQS16toFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertToFP32FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertQS8toFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLargeQS16, CLDepthConvertToFP32FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertQS16toFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(FP32_to_Quantized) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset), - shape, dt, policy, fixed_point_position) -{ - int shift = 0; - - // Create tensors - CLTensor src = create_tensor(shape, DataType::F32, 1, fixed_point_position); - CLTensor dst = create_tensor(shape, dt, 1, fixed_point_position); - - // Create and Configure function - CLDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} -FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertToQS8FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertFP32toQS8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertToQS16FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertFP32toQS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertToQS8FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertFP32toQS8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLargeQS16, CLDepthConvertToQS16FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertFP32toQS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/CL/DepthConvertLayer.cpp b/tests/validation/CL/DepthConvertLayer.cpp new file mode 100644 index 0000000000..9c6cc46ca8 --- /dev/null +++ b/tests/validation/CL/DepthConvertLayer.cpp @@ -0,0 +1,492 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h" +#include "tests/CL/CLAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/ConvertPolicyDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/DepthConvertLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Input data sets **/ +const auto DepthConvertLayerU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16)); +const auto DepthConvertLayerU8toS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16)); +const auto DepthConvertLayerU8toS32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32)); +const auto DepthConvertLayerU16toU8Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8)); +const auto DepthConvertLayerU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32)); +const auto DepthConvertLayerS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); +const auto DepthConvertLayerS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); +const auto DepthConvertLayerQS8toFP32Dataset = combine(framework::dataset::make("DataType", DataType::QS8), framework::dataset::make("DataType", DataType::F32)); +const auto DepthConvertLayerQS16toFP32Dataset = combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::F32)); +const auto DepthConvertLayerFP32toQS8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS8)); +const auto DepthConvertLayerFP32toQS16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS16)); +const auto DepthConvertLayerShiftDataset = framework::dataset::make("Shift", 0, 7); +const auto DepthConvertLayerFixedPointQuantizedDataset = framework::dataset::make("FractionalBits", 1, 7); +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(DepthConvertLayer) +template +using CLDepthConvertLayerToU16Fixture = DepthConvertLayerValidationFixture; +template +using CLDepthConvertLayerToS16Fixture = DepthConvertLayerValidationFixture; +template +using CLDepthConvertLayerToS32Fixture = DepthConvertLayerValidationFixture; +template +using CLDepthConvertLayerToU8Fixture = DepthConvertLayerValidationFixture; +template +using CLDepthConvertLayerToU32Fixture = DepthConvertLayerValidationFixture; +template +using CLDepthConvertLayerToFP32FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture; +template +using CLDepthConvertLayerToQS8FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture; +template +using CLDepthConvertLayerToQS16FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture; + +TEST_SUITE(U8_to_U16) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + int fixed_point_position = 0; + + // Create tensors + CLTensor src = create_tensor(shape, DataType::U8, 1, fixed_point_position); + CLTensor dst = create_tensor(shape, DataType::U16, 1, fixed_point_position); + + // Create and Configure function + CLDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toU16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toU16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(U8_to_S16) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + int fixed_point_position = 0; + + // Create tensors + CLTensor src = create_tensor(shape, DataType::U8, 1, fixed_point_position); + CLTensor dst = create_tensor(shape, DataType::S16, 1, fixed_point_position); + + // Create and Configure function + CLDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() +TEST_SUITE(U8_to_S32) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + int fixed_point_position = 0; + + // Create tensors + CLTensor src = create_tensor(shape, DataType::U8, 1, fixed_point_position); + CLTensor dst = create_tensor(shape, DataType::S32, 1, fixed_point_position); + + // Create and Configure function + CLDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(U16_to_U8) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + int fixed_point_position = 0; + + // Create tensors + CLTensor src = create_tensor(shape, DataType::U16, 1, fixed_point_position); + CLTensor dst = create_tensor(shape, DataType::U8, 1, fixed_point_position); + + // Create and Configure function + CLDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(U16_to_U32) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + int fixed_point_position = 0; + + // Create tensors + CLTensor src = create_tensor(shape, DataType::U16, 1, fixed_point_position); + CLTensor dst = create_tensor(shape, DataType::U32, 1, fixed_point_position); + + // Create and Configure function + CLDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(S16_to_U8) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + int fixed_point_position = 0; + + // Create tensors + CLTensor src = create_tensor(shape, DataType::S16, 1, fixed_point_position); + CLTensor dst = create_tensor(shape, DataType::U8, 1, fixed_point_position); + + // Create and Configure function + CLDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(S16_to_S32) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + int fixed_point_position = 0; + + // Create tensors + CLTensor src = create_tensor(shape, DataType::S16, 1, fixed_point_position); + CLTensor dst = create_tensor(shape, DataType::S32, 1, fixed_point_position); + + // Create and Configure function + CLDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(Quantized_to_FP32) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset), + shape, dt, policy, fixed_point_position) +{ + int shift = 0; + + // Create tensors + CLTensor src = create_tensor(shape, dt, 1, fixed_point_position); + CLTensor dst = create_tensor(shape, DataType::F32, 1, fixed_point_position); + + // Create and Configure function + CLDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} +FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertLayerToFP32FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), + DepthConvertLayerQS8toFP32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertLayerToFP32FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), + DepthConvertLayerQS16toFP32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertLayerToFP32FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), + DepthConvertLayerQS8toFP32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLargeQS16, CLDepthConvertLayerToFP32FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), + DepthConvertLayerQS16toFP32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(FP32_to_Quantized) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset), + shape, dt, policy, fixed_point_position) +{ + int shift = 0; + + // Create tensors + CLTensor src = create_tensor(shape, DataType::F32, 1, fixed_point_position); + CLTensor dst = create_tensor(shape, dt, 1, fixed_point_position); + + // Create and Configure function + CLDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} +FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertLayerToQS8FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), + DepthConvertLayerFP32toQS8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertLayerToQS16FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), + DepthConvertLayerFP32toQS16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertLayerToQS8FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), + DepthConvertLayerFP32toQS8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLargeQS16, CLDepthConvertLayerToQS16FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), + DepthConvertLayerFP32toQS16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/DepthwiseConvolution.cpp b/tests/validation/CL/DepthwiseConvolution.cpp deleted file mode 100644 index ccd9c36561..0000000000 --- a/tests/validation/CL/DepthwiseConvolution.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h" -#include "tests/CL/CLAccessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/DepthwiseConvolutionDataset.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/DepthwiseConvolutionFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -constexpr RelativeTolerance tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ -constexpr RelativeTolerance tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */ -} // namespace - -TEST_SUITE(CL) -TEST_SUITE(DepthwiseConvolutionLayer) - -template -using CLDepthwiseConvolutionFixture = DepthwiseConvolutionValidationFixture; - -TEST_SUITE(Generic) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionFixture, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionDataset(), framework::dataset::make("DataType", - DataType::F32))) -{ - validate(CLAccessor(_target), _reference, tolerance_f32); -} -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionDataset(), framework::dataset::make("DataType", - DataType::F32))) -{ - validate(CLAccessor(_target), _reference, tolerance_f32); -} -TEST_SUITE_END() - -template -using CLDepthwiseConvolutionFixture3x3 = DepthwiseConvolutionValidationFixture; - -TEST_SUITE(Float) -TEST_SUITE(FP32) -TEST_SUITE(W3x3) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionFixture3x3, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionDataset3x3(), framework::dataset::make("DataType", - DataType::F32))) -{ - validate(CLAccessor(_target), _reference, tolerance_f32); -} -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionFixture3x3, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionDataset3x3(), framework::dataset::make("DataType", - DataType::F32))) -{ - validate(CLAccessor(_target), _reference, tolerance_f32); -} -TEST_SUITE_END() -TEST_SUITE_END() -TEST_SUITE_END() - -template -using CLDepthwiseConvolutionQuantizedFixture3x3 = DepthwiseConvolutionValidationQuantizedFixture; - -TEST_SUITE(Quantized) -TEST_SUITE(QASYMM8) -TEST_SUITE(W3x3) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionQuantizedFixture3x3, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionDataset3x3(), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) }))) -{ - validate(CLAccessor(_target), _reference, tolerance_qasymm8); -} -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionDataset3x3(), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) }))) -{ - validate(CLAccessor(_target), _reference, tolerance_qasymm8); -} -TEST_SUITE_END() -TEST_SUITE_END() -TEST_SUITE_END() - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp new file mode 100644 index 0000000000..92a2773e54 --- /dev/null +++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h" +#include "tests/CL/CLAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/DepthwiseConvolutionLayerDataset.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +constexpr RelativeTolerance tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +constexpr RelativeTolerance tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */ +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(DepthwiseConvolutionLayer) + +template +using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture; + +TEST_SUITE(Generic) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset(), framework::dataset::make("DataType", + DataType::F32))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + framework::dataset::make("DataType", + DataType::F32))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() + +template +using CLDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP32) +TEST_SUITE(W3x3) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DataType", + DataType::F32))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DataType", + DataType::F32))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() +TEST_SUITE_END() +TEST_SUITE_END() + +template +using CLDepthwiseConvolutionLayerQuantizedFixture3x3 = DepthwiseConvolutionLayerValidationQuantizedFixture; + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +TEST_SUITE(W3x3) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) }))) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) }))) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/L2Normalize.cpp b/tests/validation/CL/L2Normalize.cpp deleted file mode 100644 index 4b0820c211..0000000000 --- a/tests/validation/CL/L2Normalize.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLL2Normalize.h" -#include "tests/CL/CLAccessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/L2NormalizeFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -/** Tolerance for float operations */ -constexpr AbsoluteTolerance tolerance_f32(0.00001f); - -} // namespace - -TEST_SUITE(CL) -TEST_SUITE(L2Normalize) - -template -using CLL2NormalizeFixture = L2NormalizeValidationFixture; - -TEST_SUITE(Float) -TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLL2NormalizeFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 }))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32); -} -FIXTURE_DATA_TEST_CASE(RunLarge, CLL2NormalizeFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 }))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32); -} -TEST_SUITE_END() -TEST_SUITE_END() - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/CL/L2NormalizeLayer.cpp b/tests/validation/CL/L2NormalizeLayer.cpp new file mode 100644 index 0000000000..bc2374bc68 --- /dev/null +++ b/tests/validation/CL/L2NormalizeLayer.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h" +#include "tests/CL/CLAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/L2NormalizeLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Tolerance for float operations */ +constexpr AbsoluteTolerance tolerance_f32(0.00001f); + +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(L2NormalizeLayer) + +template +using CLL2NormalizeLayerFixture = L2NormalizeLayerValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLL2NormalizeLayerFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLL2NormalizeLayerFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CPP/DepthConvert.cpp b/tests/validation/CPP/DepthConvert.cpp deleted file mode 100644 index 110174a73f..0000000000 --- a/tests/validation/CPP/DepthConvert.cpp +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "DepthConvert.h" - -#include "tests/validation/FixedPoint.h" -#include "tests/validation/Helpers.h" - -#include "tests/Types.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template < typename T1, typename T2, typename std::enable_if < std::is_integral::value &&std::is_floating_point::value, int >::type > -SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift) -{ - ARM_COMPUTE_UNUSED(policy); - ARM_COMPUTE_UNUSED(shift); - - using namespace fixed_point_arithmetic; - SimpleTensor result(src.shape(), dt_out); - - const int fixed_point_position = src.fixed_point_position(); - - for(int i = 0; i < src.num_elements(); ++i) - { - result[i] = static_cast(fixed_point(src[i], fixed_point_position, true)); - } - - return result; -} - -template < typename T1, typename T2, typename std::enable_if < std::is_floating_point::value &&std::is_integral::value, int >::type > -SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift) -{ - ARM_COMPUTE_UNUSED(policy); - ARM_COMPUTE_UNUSED(shift); - - using namespace fixed_point_arithmetic; - SimpleTensor result(src.shape(), dt_out, 1, src.fixed_point_position()); - - const int fixed_point_position = result.fixed_point_position(); - - for(int i = 0; i < src.num_elements(); ++i) - { - result[i] = fixed_point(src[i], fixed_point_position).raw(); - } - - return result; -} - -template < typename T1, typename T2, typename std::enable_if < std::is_integral::value &&std::is_integral::value &&!std::is_same::value, int >::type > -SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift) -{ - SimpleTensor result(src.shape(), dt_out); - - // Up-casting - if(src.data_type() <= dt_out) - { - for(int i = 0; i < src.num_elements(); ++i) - { - result[i] = src[i] << shift; - } - } - // Down-casting - else - { - for(int i = 0; i < src.num_elements(); ++i) - { - T1 val = src[i] >> shift; - result[i] = (policy == ConvertPolicy::SATURATE) ? saturate_cast(val) : static_cast(val); - } - } - return result; -} - -template < typename T1, typename T2, typename std::enable_if < std::is_integral::value &&std::is_integral::value &&std::is_same::value, int >::type > -SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift) -{ - ARM_COMPUTE_UNUSED(policy); - - using namespace fixed_point_arithmetic; - - SimpleTensor result(src.shape(), dt_out); - - bool is_in_place = (&src == &result); - - const int fixed_point_position_in = src.fixed_point_position(); - const int fixed_point_position_out = (is_in_place) ? static_cast(shift) : result.fixed_point_position(); - - if(!is_in_place || (fixed_point_position_in != fixed_point_position_out)) - { - for(int i = 0; i < src.num_elements(); ++i) - { - auto x = fixed_point(src[i], fixed_point_position_in, true); - x.resacle(fixed_point_position_out); - result[i] = x.raw(); - } - } - - return result; -} - -template < typename T1, typename T2, typename std::enable_if < std::is_floating_point::value &&is_floating_point::value, int >::type > -SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift) -{ - ARM_COMPUTE_UNUSED(policy); - ARM_COMPUTE_UNUSED(shift); - - SimpleTensor result(src.shape(), dt_out); - - for(int i = 0; i < src.num_elements(); ++i) - { - result[i] = static_cast(src[i]); - } -} - -template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); -template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); -template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); -template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); -template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); -template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); -template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); -template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); -template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); -template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); -template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/CPP/DepthConvert.h b/tests/validation/CPP/DepthConvert.h deleted file mode 100644 index 1446bfda5b..0000000000 --- a/tests/validation/CPP/DepthConvert.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_DEPTH_CONVERT_H__ -#define __ARM_COMPUTE_TEST_DEPTH_CONVERT_H__ - -#include "tests/SimpleTensor.h" -#include "tests/validation/Helpers.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template < typename T1, typename T2, typename std::enable_if < std::is_integral::value &&std::is_floating_point::value, int >::type = 0 > -SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); - -template < typename T1, typename T2, typename std::enable_if < std::is_floating_point::value &&std::is_integral::value, int >::type = 0 > -SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); - -template < typename T1, typename T2, typename std::enable_if < std::is_integral::value &&std::is_integral::value &&!std::is_same::value, int >::type = 0 > -SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); - -template < typename T1, typename T2, typename std::enable_if < std::is_integral::value &&std::is_integral::value &&std::is_same::value, int >::type = 0 > -SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); - -template < typename T1, typename T2, typename std::enable_if < std::is_floating_point::value &&is_floating_point::value, int >::type = 0 > -SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* __ARM_COMPUTE_TEST_DEPTH_CONVERT_H__ */ diff --git a/tests/validation/CPP/DepthConvertLayer.cpp b/tests/validation/CPP/DepthConvertLayer.cpp new file mode 100644 index 0000000000..dd095b8912 --- /dev/null +++ b/tests/validation/CPP/DepthConvertLayer.cpp @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "DepthConvertLayer.h" + +#include "tests/validation/FixedPoint.h" +#include "tests/validation/Helpers.h" + +#include "tests/Types.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +template < typename T1, typename T2, typename std::enable_if < std::is_integral::value &&std::is_floating_point::value, int >::type > +SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift) +{ + ARM_COMPUTE_UNUSED(policy); + ARM_COMPUTE_UNUSED(shift); + + using namespace fixed_point_arithmetic; + SimpleTensor result(src.shape(), dt_out); + + const int fixed_point_position = src.fixed_point_position(); + + for(int i = 0; i < src.num_elements(); ++i) + { + result[i] = static_cast(fixed_point(src[i], fixed_point_position, true)); + } + + return result; +} + +template < typename T1, typename T2, typename std::enable_if < std::is_floating_point::value &&std::is_integral::value, int >::type > +SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift) +{ + ARM_COMPUTE_UNUSED(policy); + ARM_COMPUTE_UNUSED(shift); + + using namespace fixed_point_arithmetic; + SimpleTensor result(src.shape(), dt_out, 1, src.fixed_point_position()); + + const int fixed_point_position = result.fixed_point_position(); + + for(int i = 0; i < src.num_elements(); ++i) + { + result[i] = fixed_point(src[i], fixed_point_position).raw(); + } + + return result; +} + +template < typename T1, typename T2, typename std::enable_if < std::is_integral::value &&std::is_integral::value &&!std::is_same::value, int >::type > +SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift) +{ + SimpleTensor result(src.shape(), dt_out); + + // Up-casting + if(src.data_type() <= dt_out) + { + for(int i = 0; i < src.num_elements(); ++i) + { + result[i] = src[i] << shift; + } + } + // Down-casting + else + { + for(int i = 0; i < src.num_elements(); ++i) + { + T1 val = src[i] >> shift; + result[i] = (policy == ConvertPolicy::SATURATE) ? saturate_cast(val) : static_cast(val); + } + } + return result; +} + +template < typename T1, typename T2, typename std::enable_if < std::is_integral::value &&std::is_integral::value &&std::is_same::value, int >::type > +SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift) +{ + ARM_COMPUTE_UNUSED(policy); + + using namespace fixed_point_arithmetic; + + SimpleTensor result(src.shape(), dt_out); + + bool is_in_place = (&src == &result); + + const int fixed_point_position_in = src.fixed_point_position(); + const int fixed_point_position_out = (is_in_place) ? static_cast(shift) : result.fixed_point_position(); + + if(!is_in_place || (fixed_point_position_in != fixed_point_position_out)) + { + for(int i = 0; i < src.num_elements(); ++i) + { + auto x = fixed_point(src[i], fixed_point_position_in, true); + x.resacle(fixed_point_position_out); + result[i] = x.raw(); + } + } + + return result; +} + +template < typename T1, typename T2, typename std::enable_if < std::is_floating_point::value &&is_floating_point::value, int >::type > +SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift) +{ + ARM_COMPUTE_UNUSED(policy); + ARM_COMPUTE_UNUSED(shift); + + SimpleTensor result(src.shape(), dt_out); + + for(int i = 0; i < src.num_elements(); ++i) + { + result[i] = static_cast(src[i]); + } +} + +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CPP/DepthConvertLayer.h b/tests/validation/CPP/DepthConvertLayer.h new file mode 100644 index 0000000000..1446bfda5b --- /dev/null +++ b/tests/validation/CPP/DepthConvertLayer.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_DEPTH_CONVERT_H__ +#define __ARM_COMPUTE_TEST_DEPTH_CONVERT_H__ + +#include "tests/SimpleTensor.h" +#include "tests/validation/Helpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +template < typename T1, typename T2, typename std::enable_if < std::is_integral::value &&std::is_floating_point::value, int >::type = 0 > +SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + +template < typename T1, typename T2, typename std::enable_if < std::is_floating_point::value &&std::is_integral::value, int >::type = 0 > +SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + +template < typename T1, typename T2, typename std::enable_if < std::is_integral::value &&std::is_integral::value &&!std::is_same::value, int >::type = 0 > +SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + +template < typename T1, typename T2, typename std::enable_if < std::is_integral::value &&std::is_integral::value &&std::is_same::value, int >::type = 0 > +SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + +template < typename T1, typename T2, typename std::enable_if < std::is_floating_point::value &&is_floating_point::value, int >::type = 0 > +SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* __ARM_COMPUTE_TEST_DEPTH_CONVERT_H__ */ diff --git a/tests/validation/CPP/DepthwiseConvolution.cpp b/tests/validation/CPP/DepthwiseConvolution.cpp deleted file mode 100644 index 229e044783..0000000000 --- a/tests/validation/CPP/DepthwiseConvolution.cpp +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "DepthwiseConvolution.h" - -#include "ConvolutionLayer.h" -#include "Utils.h" - -#include "tests/validation/CPP/Utils.h" -#include "tests/validation/CPP/UtilsQuantizedAsymm.h" -#include "tests/validation/FixedPoint.h" -#include "tests/validation/Helpers.h" - -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -/** Perform a depthwise convolution - * - * - Three dimensions tensors - * - Third dimention is number of channels - * - Depths of input tensor and filter are equals - * - Padding, stride and output shape "match" - * - */ -template -SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info) -{ - // Create reference - SimpleTensor dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() }; - - // Compute reference - const int filter_width = weights.shape().x(); - const int filter_height = weights.shape().y(); - const int filter_plane = filter_width * filter_height; - const int input_width = src.shape().x(); - const int input_height = src.shape().y(); - const int input_depth = src.shape().z(); - const int num_batches = src.shape().total_size() / (input_width * input_height * input_depth); - - const int filter_half_width = filter_width / 2; - const int filter_half_height = filter_height / 2; - - const int pad_left = std::min(static_cast(conv_info.pad_left()), filter_half_width); - const int pad_top = std::min(static_cast(conv_info.pad_top()), filter_half_height); - const int pad_right = std::min(static_cast(conv_info.pad_right()), filter_half_width); - const int pad_bottom = std::min(static_cast(conv_info.pad_bottom()), filter_half_height); - - const int minimum_x = -pad_left + filter_half_width; - const int minimum_y = -pad_top + filter_half_height; - const int maximum_x = input_width + pad_left - filter_half_width + pad_right - filter_half_width; - const int maximum_y = input_height + pad_top - filter_half_height + pad_bottom - filter_half_height; - - int out_pos = 0; - for(int r = 0; r < num_batches; ++r) - { - for(int z = 0; z < input_depth; ++z) - { - for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second) - { - for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first) - { - Coordinates coords(static_cast(x), static_cast(y), static_cast(z), static_cast(r)); - size_t filter_offset = filter_plane * z; - - T val = 0; - for(int j = y - filter_half_height; j <= static_cast(y + filter_half_height); ++j) - { - for(int i = x - filter_half_width; i <= static_cast(x + filter_half_width); ++i) - { - coords.set(0, i); - coords.set(1, j); - val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, 0.f); - ++filter_offset; - } - } - coords.set(0, x); - coords.set(1, y); - dst[out_pos++] = saturate_cast(val + *static_cast(biases(Coordinates(z)))); - } - } - } - } - - return dst; -} - -template <> -SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, - const PadStrideInfo &conv_info) -{ - // Create reference - SimpleTensor dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() }; - - const int input_offset = -src.quantization_info().offset; - const float input_scale = src.quantization_info().scale; - const int weights_offset = -weights.quantization_info().offset; - const float weights_scale = weights.quantization_info().scale; - const int output_offset = dst.quantization_info().offset; - const float output_scale = dst.quantization_info().scale; - - int output_multiplier; - int output_shift; - const float multiplier = input_scale * weights_scale / output_scale; - arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); - - // Compute reference - const int filter_width = weights.shape().x(); - const int filter_height = weights.shape().y(); - const int filter_plane = filter_width * filter_height; - const int input_width = src.shape().x(); - const int input_height = src.shape().y(); - const int input_depth = src.shape().z(); - const int num_batches = src.shape().total_size() / (input_width * input_height * input_depth); - - const int filter_half_size = filter_width / 2; - const int pad_x = std::min(filter_half_size, static_cast(conv_info.pad().first)); - const int pad_y = std::min(filter_half_size, static_cast(conv_info.pad().second)); - const int minimum_x = -pad_x + filter_half_size; - const int minimum_y = -pad_y + filter_half_size; - - int out_pos = 0; - for(int r = 0; r < num_batches; ++r) - { - for(int z = 0; z < input_depth; ++z) - { - int32_t bias_val = *static_cast(biases(Coordinates(z))); - for(int y = minimum_y; y < input_height + pad_y - filter_half_size; y += conv_info.stride().second) - { - for(int x = minimum_x; x < input_width + pad_x - filter_half_size; x += conv_info.stride().first) - { - Coordinates coords(x, y, z); - int filter_offset = filter_plane * z; - - uint32_t val = 0; - for(int j = y - filter_half_size; j <= (y + filter_half_size); ++j) - { - for(int i = x - filter_half_size; i <= (x + filter_half_size); ++i) - { - coords.set(0, i); - coords.set(1, j); - auto in_val = tensor_elem_at(src, coords, BorderMode::CONSTANT, 0); - uint8_t w_val = *(weights.data() + filter_offset); - val += (in_val + input_offset) * (w_val + weights_offset); - ++filter_offset; - } - } - val += bias_val; - val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift); - val += output_offset; - val = std::max(val, 0); - val = std::min(val, 255); - - // Store the result - dst[out_pos++] = val; - } - } - } - } - - return dst; -} - -template SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, - const PadStrideInfo &conv_info); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/CPP/DepthwiseConvolution.h b/tests/validation/CPP/DepthwiseConvolution.h deleted file mode 100644 index df743a5b8e..0000000000 --- a/tests/validation/CPP/DepthwiseConvolution.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_H__ -#define __ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_H__ - -#include "tests/SimpleTensor.h" -#include "tests/validation/Helpers.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template -SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* __ARM_COMPUTE_TEST_DEPTHWISE_SEPARABLE_CONVOLUTION_LAYER_H__ */ diff --git a/tests/validation/CPP/DepthwiseConvolutionLayer.cpp b/tests/validation/CPP/DepthwiseConvolutionLayer.cpp new file mode 100644 index 0000000000..99baa4b3c7 --- /dev/null +++ b/tests/validation/CPP/DepthwiseConvolutionLayer.cpp @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "DepthwiseConvolutionLayer.h" + +#include "ConvolutionLayer.h" +#include "Utils.h" + +#include "tests/validation/CPP/Utils.h" +#include "tests/validation/CPP/UtilsQuantizedAsymm.h" +#include "tests/validation/FixedPoint.h" +#include "tests/validation/Helpers.h" + +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +/** Perform a depthwise convolution + * + * - Three dimensions tensors + * - Third dimention is number of channels + * - Depths of input tensor and filter are equals + * - Padding, stride and output shape "match" + * + */ +template +SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info) +{ + // Create reference + SimpleTensor dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() }; + + // Compute reference + const int filter_width = weights.shape().x(); + const int filter_height = weights.shape().y(); + const int filter_plane = filter_width * filter_height; + const int input_width = src.shape().x(); + const int input_height = src.shape().y(); + const int input_depth = src.shape().z(); + const int num_batches = src.shape().total_size() / (input_width * input_height * input_depth); + + const int filter_half_width = filter_width / 2; + const int filter_half_height = filter_height / 2; + + const int pad_left = std::min(static_cast(conv_info.pad_left()), filter_half_width); + const int pad_top = std::min(static_cast(conv_info.pad_top()), filter_half_height); + const int pad_right = std::min(static_cast(conv_info.pad_right()), filter_half_width); + const int pad_bottom = std::min(static_cast(conv_info.pad_bottom()), filter_half_height); + + const int minimum_x = -pad_left + filter_half_width; + const int minimum_y = -pad_top + filter_half_height; + const int maximum_x = input_width + pad_left - filter_half_width + pad_right - filter_half_width; + const int maximum_y = input_height + pad_top - filter_half_height + pad_bottom - filter_half_height; + + int out_pos = 0; + for(int r = 0; r < num_batches; ++r) + { + for(int z = 0; z < input_depth; ++z) + { + for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second) + { + for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first) + { + Coordinates coords(static_cast(x), static_cast(y), static_cast(z), static_cast(r)); + size_t filter_offset = filter_plane * z; + + T val = 0; + for(int j = y - filter_half_height; j <= static_cast(y + filter_half_height); ++j) + { + for(int i = x - filter_half_width; i <= static_cast(x + filter_half_width); ++i) + { + coords.set(0, i); + coords.set(1, j); + val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, 0.f); + ++filter_offset; + } + } + coords.set(0, x); + coords.set(1, y); + dst[out_pos++] = saturate_cast(val + *static_cast(biases(Coordinates(z)))); + } + } + } + } + + return dst; +} + +template <> +SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, + const PadStrideInfo &conv_info) +{ + // Create reference + SimpleTensor dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() }; + + const int input_offset = -src.quantization_info().offset; + const float input_scale = src.quantization_info().scale; + const int weights_offset = -weights.quantization_info().offset; + const float weights_scale = weights.quantization_info().scale; + const int output_offset = dst.quantization_info().offset; + const float output_scale = dst.quantization_info().scale; + + int output_multiplier; + int output_shift; + const float multiplier = input_scale * weights_scale / output_scale; + arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); + + // Compute reference + const int filter_width = weights.shape().x(); + const int filter_height = weights.shape().y(); + const int filter_plane = filter_width * filter_height; + const int input_width = src.shape().x(); + const int input_height = src.shape().y(); + const int input_depth = src.shape().z(); + const int num_batches = src.shape().total_size() / (input_width * input_height * input_depth); + + const int filter_half_size = filter_width / 2; + const int pad_x = std::min(filter_half_size, static_cast(conv_info.pad().first)); + const int pad_y = std::min(filter_half_size, static_cast(conv_info.pad().second)); + const int minimum_x = -pad_x + filter_half_size; + const int minimum_y = -pad_y + filter_half_size; + + int out_pos = 0; + for(int r = 0; r < num_batches; ++r) + { + for(int z = 0; z < input_depth; ++z) + { + int32_t bias_val = *static_cast(biases(Coordinates(z))); + for(int y = minimum_y; y < input_height + pad_y - filter_half_size; y += conv_info.stride().second) + { + for(int x = minimum_x; x < input_width + pad_x - filter_half_size; x += conv_info.stride().first) + { + Coordinates coords(x, y, z); + int filter_offset = filter_plane * z; + + uint32_t val = 0; + for(int j = y - filter_half_size; j <= (y + filter_half_size); ++j) + { + for(int i = x - filter_half_size; i <= (x + filter_half_size); ++i) + { + coords.set(0, i); + coords.set(1, j); + auto in_val = tensor_elem_at(src, coords, BorderMode::CONSTANT, 0); + uint8_t w_val = *(weights.data() + filter_offset); + val += (in_val + input_offset) * (w_val + weights_offset); + ++filter_offset; + } + } + val += bias_val; + val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift); + val += output_offset; + val = std::max(val, 0); + val = std::min(val, 255); + + // Store the result + dst[out_pos++] = val; + } + } + } + } + + return dst; +} + +template SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, + const PadStrideInfo &conv_info); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CPP/DepthwiseConvolutionLayer.h b/tests/validation/CPP/DepthwiseConvolutionLayer.h new file mode 100644 index 0000000000..df743a5b8e --- /dev/null +++ b/tests/validation/CPP/DepthwiseConvolutionLayer.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_H__ +#define __ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_H__ + +#include "tests/SimpleTensor.h" +#include "tests/validation/Helpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +template +SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* __ARM_COMPUTE_TEST_DEPTHWISE_SEPARABLE_CONVOLUTION_LAYER_H__ */ diff --git a/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp b/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp index 8c8e50d349..ca6c168114 100644 --- a/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp +++ b/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "DepthwiseConvolution.h" +#include "DepthwiseConvolutionLayer.h" #include "DepthwiseSeparableConvolutionLayer.h" diff --git a/tests/validation/CPP/L2Normalize.cpp b/tests/validation/CPP/L2Normalize.cpp deleted file mode 100644 index 4fb4d57eb4..0000000000 --- a/tests/validation/CPP/L2Normalize.cpp +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "L2Normalize.h" -#include "ReductionOperation.h" - -#include "tests/validation/Helpers.h" - -#include -#include - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -namespace -{ -TensorShape get_output_shape(TensorShape shape, unsigned int axis) -{ - TensorShape output_shape(shape); - output_shape.set(axis, 1); - return output_shape; -} -} // namespace - -template -SimpleTensor l2_normalize(const SimpleTensor &src, unsigned int axis, float epsilon) -{ - // Create reference - SimpleTensor dst{ src.shape(), src.data_type() }; - - // Reduce across given axis - SimpleTensor sum = reduction_operation(src, get_output_shape(src.shape(), axis), axis, ReductionOperation::SUM_SQUARE); - - // Compute reference - const int elems = src.shape()[axis]; - const int upper_dims = src.shape().total_size_upper(axis + 1); - - for(int du = 0; du < upper_dims; ++du) - { - if(axis == 0) - { - const T *src_row_ptr = src.data() + du * elems; - T *dst_row_ptr = dst.data() + du * elems; - const T normalization_value = std::sqrt(std::max(sum[du], epsilon)); - std::transform(src_row_ptr, src_row_ptr + elems, dst_row_ptr, [normalization_value](T val) - { - return val / normalization_value; - }); - } - else - { - ARM_COMPUTE_ERROR("Unsupported normalization axis"); - } - } - - return dst; -} - -template SimpleTensor l2_normalize(const SimpleTensor &src, unsigned int axis, float epsilon); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/CPP/L2Normalize.h b/tests/validation/CPP/L2Normalize.h deleted file mode 100644 index 1db3ae6174..0000000000 --- a/tests/validation/CPP/L2Normalize.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_TEST_L2NORMALIZE_H__ -#define __ARM_COMPUTE_TEST_L2NORMALIZE_H__ - -#include "tests/SimpleTensor.h" -#include "tests/validation/Helpers.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template -SimpleTensor l2_normalize(const SimpleTensor &src, unsigned int axis, float epsilon); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* __ARM_COMPUTE_TEST_L2NORMALIZE_H__ */ diff --git a/tests/validation/CPP/L2NormalizeLayer.cpp b/tests/validation/CPP/L2NormalizeLayer.cpp new file mode 100644 index 0000000000..99f4e8a6e6 --- /dev/null +++ b/tests/validation/CPP/L2NormalizeLayer.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "L2NormalizeLayer.h" +#include "ReductionOperation.h" + +#include "tests/validation/Helpers.h" + +#include +#include + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +namespace +{ +TensorShape get_output_shape(TensorShape shape, unsigned int axis) +{ + TensorShape output_shape(shape); + output_shape.set(axis, 1); + return output_shape; +} +} // namespace + +template +SimpleTensor l2_normalize(const SimpleTensor &src, unsigned int axis, float epsilon) +{ + // Create reference + SimpleTensor dst{ src.shape(), src.data_type() }; + + // Reduce across given axis + SimpleTensor sum = reduction_operation(src, get_output_shape(src.shape(), axis), axis, ReductionOperation::SUM_SQUARE); + + // Compute reference + const int elems = src.shape()[axis]; + const int upper_dims = src.shape().total_size_upper(axis + 1); + + for(int du = 0; du < upper_dims; ++du) + { + if(axis == 0) + { + const T *src_row_ptr = src.data() + du * elems; + T *dst_row_ptr = dst.data() + du * elems; + const T normalization_value = std::sqrt(std::max(sum[du], epsilon)); + std::transform(src_row_ptr, src_row_ptr + elems, dst_row_ptr, [normalization_value](T val) + { + return val / normalization_value; + }); + } + else + { + ARM_COMPUTE_ERROR("Unsupported normalization axis"); + } + } + + return dst; +} + +template SimpleTensor l2_normalize(const SimpleTensor &src, unsigned int axis, float epsilon); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CPP/L2NormalizeLayer.h b/tests/validation/CPP/L2NormalizeLayer.h new file mode 100644 index 0000000000..1db3ae6174 --- /dev/null +++ b/tests/validation/CPP/L2NormalizeLayer.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_L2NORMALIZE_H__ +#define __ARM_COMPUTE_TEST_L2NORMALIZE_H__ + +#include "tests/SimpleTensor.h" +#include "tests/validation/Helpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +template +SimpleTensor l2_normalize(const SimpleTensor &src, unsigned int axis, float epsilon); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* __ARM_COMPUTE_TEST_L2NORMALIZE_H__ */ diff --git a/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp b/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp index 829845dd36..7af3050c1d 100644 --- a/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp +++ b/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp @@ -24,7 +24,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" #include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h" #include "tests/GLES_COMPUTE/GCAccessor.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/framework/Asserts.h" @@ -45,7 +45,7 @@ TEST_SUITE(DepthConcatenateLayer) //TODO(COMPMID-415): Add configuration test? template -using GCDepthConcatenateLayerFixture = DepthConcatenateValidationFixture; +using GCDepthConcatenateLayerFixture = DepthConcatenateLayerValidationFixture; TEST_SUITE(Float) TEST_SUITE(FP16) @@ -70,7 +70,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthConcatenateLayerFixture, framewor // Validate output validate(GCAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), framework::dataset::make("DataType", +FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType", DataType::F32))) { // Validate output diff --git a/tests/validation/NEON/DepthConcatenateLayer.cpp b/tests/validation/NEON/DepthConcatenateLayer.cpp index 9a0a34f8f8..7e99ab5dc7 100644 --- a/tests/validation/NEON/DepthConcatenateLayer.cpp +++ b/tests/validation/NEON/DepthConcatenateLayer.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" #include "tests/NEON/Accessor.h" @@ -45,7 +45,7 @@ TEST_SUITE(DepthConcatenateLayer) //TODO(COMPMID-415): Add configuration test? template -using NEDepthConcatenateLayerFixture = DepthConcatenateValidationFixture; +using NEDepthConcatenateLayerFixture = DepthConcatenateLayerValidationFixture; TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC @@ -56,7 +56,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture, framework // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), framework::dataset::make("DataType", +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType", DataType::F16))) { // Validate output @@ -72,7 +72,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture, framewor // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), framework::dataset::make("DataType", +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType", DataType::F32))) { // Validate output @@ -90,7 +90,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture, framewo // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType", DataType::QS8))) { @@ -107,7 +107,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture, framew // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType", DataType::QS16))) { diff --git a/tests/validation/NEON/DepthConvert.cpp b/tests/validation/NEON/DepthConvert.cpp deleted file mode 100644 index e036cc45d1..0000000000 --- a/tests/validation/NEON/DepthConvert.cpp +++ /dev/null @@ -1,484 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ConvertPolicyDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/DepthConvertFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -/** Input data sets **/ -const auto DepthConvertU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16)); -const auto DepthConvertU8toS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16)); -const auto DepthConvertU8toS32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32)); -const auto DepthConvertU16toU8Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8)); -const auto DepthConvertU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32)); -const auto DepthConvertS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); -const auto DepthConvertS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); -const auto DepthConvertQS8toFP32Dataset = combine(framework::dataset::make("DataType", DataType::QS8), framework::dataset::make("DataType", DataType::F32)); -const auto DepthConvertQS16toFP32Dataset = combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::F32)); -const auto DepthConvertFP32toQS8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS8)); -const auto DepthConvertFP32toQS16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS16)); -const auto DepthConvertShiftDataset = framework::dataset::make("Shift", 0, 7); -const auto DepthConvertFixedPointQuantizedDataset = framework::dataset::make("FractionalBits", 1, 7); -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(DepthConvert) -template -using NEDepthConvertToU16Fixture = DepthConvertValidationFixture; -template -using NEDepthConvertToS16Fixture = DepthConvertValidationFixture; -template -using NEDepthConvertToS32Fixture = DepthConvertValidationFixture; -template -using NEDepthConvertToU8Fixture = DepthConvertValidationFixture; -template -using NEDepthConvertToU32Fixture = DepthConvertValidationFixture; -template -using NEDepthConvertToFP32FixedPointFixture = DepthConvertValidationFractionalBitsFixture; -template -using NEDepthConvertToQS8FixedPointFixture = DepthConvertValidationFractionalBitsFixture; -template -using NEDepthConvertToQS16FixedPointFixture = DepthConvertValidationFractionalBitsFixture; - -TEST_SUITE(U8_to_U16) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset), - shape, policy, shift) -{ - int fixed_point_position = 0; - - // Create tensors - Tensor src = create_tensor(shape, DataType::U8, 1, fixed_point_position); - Tensor dst = create_tensor(shape, DataType::U16, 1, fixed_point_position); - - // Create and Configure function - NEDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToU16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toU16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToU16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toU16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(U8_to_S16) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset), - shape, policy, shift) -{ - int fixed_point_position = 0; - - // Create tensors - Tensor src = create_tensor(shape, DataType::U8, 1, fixed_point_position); - Tensor dst = create_tensor(shape, DataType::S16, 1, fixed_point_position); - - // Create and Configure function - NEDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToS16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToS16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() -TEST_SUITE(U8_to_S32) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset), - shape, policy, shift) -{ - int fixed_point_position = 0; - - // Create tensors - Tensor src = create_tensor(shape, DataType::U8, 1, fixed_point_position); - Tensor dst = create_tensor(shape, DataType::S32, 1, fixed_point_position); - - // Create and Configure function - NEDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToS32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toS32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToS32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toS32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(U16_to_U8) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset), - shape, policy, shift) -{ - int fixed_point_position = 0; - - // Create tensors - Tensor src = create_tensor(shape, DataType::U16, 1, fixed_point_position); - Tensor dst = create_tensor(shape, DataType::U8, 1, fixed_point_position); - - // Create and Configure function - NEDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToU8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU16toU8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToU8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU16toU8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(U16_to_U32) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset), - shape, policy, shift) -{ - int fixed_point_position = 0; - - // Create tensors - Tensor src = create_tensor(shape, DataType::U16, 1, fixed_point_position); - Tensor dst = create_tensor(shape, DataType::U32, 1, fixed_point_position); - - // Create and Configure function - NEDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToU32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU16toU32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToU32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU16toU32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(S16_to_U8) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset), - shape, policy, shift) -{ - int fixed_point_position = 0; - - // Create tensors - Tensor src = create_tensor(shape, DataType::S16, 1, fixed_point_position); - Tensor dst = create_tensor(shape, DataType::U8, 1, fixed_point_position); - - // Create and Configure function - NEDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToU8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertS16toU8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToU8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertS16toU8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(S16_to_S32) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset), - shape, policy, shift) -{ - int fixed_point_position = 0; - - // Create tensors - Tensor src = create_tensor(shape, DataType::S16, 1, fixed_point_position); - Tensor dst = create_tensor(shape, DataType::S32, 1, fixed_point_position); - - // Create and Configure function - NEDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToS32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertS16toS32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToS32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertS16toS32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(Quantized_to_FP32) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset), - shape, dt, policy, fixed_point_position) -{ - int shift = 0; - - // Create tensors - Tensor src = create_tensor(shape, dt, 1, fixed_point_position); - Tensor dst = create_tensor(shape, DataType::F32, 1, fixed_point_position); - - // Create and Configure function - NEDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} -FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertToFP32FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertQS8toFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertToFP32FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertQS16toFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertToFP32FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertQS8toFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLargeQS16, NEDepthConvertToFP32FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertQS16toFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(FP32_to_Quantized) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset), - shape, dt, policy, fixed_point_position) -{ - int shift = 0; - - // Create tensors - Tensor src = create_tensor(shape, DataType::F32, 1, fixed_point_position); - Tensor dst = create_tensor(shape, dt, 1, fixed_point_position); - - // Create and Configure function - NEDepthConvert depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} -FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertToQS8FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertFP32toQS8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertToQS16FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertFP32toQS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertToQS8FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertFP32toQS8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLargeQS16, NEDepthConvertToQS16FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertFP32toQS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertFixedPointQuantizedDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/DepthConvertLayer.cpp b/tests/validation/NEON/DepthConvertLayer.cpp new file mode 100644 index 0000000000..a56298babc --- /dev/null +++ b/tests/validation/NEON/DepthConvertLayer.cpp @@ -0,0 +1,492 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" +#include "tests/NEON/Accessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/ConvertPolicyDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/DepthConvertLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Input data sets **/ +const auto DepthConvertLayerU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16)); +const auto DepthConvertLayerU8toS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16)); +const auto DepthConvertLayerU8toS32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32)); +const auto DepthConvertLayerU16toU8Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8)); +const auto DepthConvertLayerU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32)); +const auto DepthConvertLayerS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); +const auto DepthConvertLayerS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); +const auto DepthConvertLayerQS8toFP32Dataset = combine(framework::dataset::make("DataType", DataType::QS8), framework::dataset::make("DataType", DataType::F32)); +const auto DepthConvertLayerQS16toFP32Dataset = combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::F32)); +const auto DepthConvertLayerFP32toQS8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS8)); +const auto DepthConvertLayerFP32toQS16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS16)); +const auto DepthConvertLayerShiftDataset = framework::dataset::make("Shift", 0, 7); +const auto DepthConvertLayerFixedPointQuantizedDataset = framework::dataset::make("FractionalBits", 1, 7); +} // namespace + +TEST_SUITE(NEON) +TEST_SUITE(DepthConvertLayer) +template +using NEDepthConvertLayerToU16Fixture = DepthConvertLayerValidationFixture; +template +using NEDepthConvertLayerToS16Fixture = DepthConvertLayerValidationFixture; +template +using NEDepthConvertLayerToS32Fixture = DepthConvertLayerValidationFixture; +template +using NEDepthConvertLayerToU8Fixture = DepthConvertLayerValidationFixture; +template +using NEDepthConvertLayerToU32Fixture = DepthConvertLayerValidationFixture; +template +using NEDepthConvertLayerToFP32FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture; +template +using NEDepthConvertLayerToQS8FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture; +template +using NEDepthConvertLayerToQS16FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture; + +TEST_SUITE(U8_to_U16) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + int fixed_point_position = 0; + + // Create tensors + Tensor src = create_tensor(shape, DataType::U8, 1, fixed_point_position); + Tensor dst = create_tensor(shape, DataType::U16, 1, fixed_point_position); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toU16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toU16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(U8_to_S16) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + int fixed_point_position = 0; + + // Create tensors + Tensor src = create_tensor(shape, DataType::U8, 1, fixed_point_position); + Tensor dst = create_tensor(shape, DataType::S16, 1, fixed_point_position); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() +TEST_SUITE(U8_to_S32) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + int fixed_point_position = 0; + + // Create tensors + Tensor src = create_tensor(shape, DataType::U8, 1, fixed_point_position); + Tensor dst = create_tensor(shape, DataType::S32, 1, fixed_point_position); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(U16_to_U8) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + int fixed_point_position = 0; + + // Create tensors + Tensor src = create_tensor(shape, DataType::U16, 1, fixed_point_position); + Tensor dst = create_tensor(shape, DataType::U8, 1, fixed_point_position); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(U16_to_U32) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + int fixed_point_position = 0; + + // Create tensors + Tensor src = create_tensor(shape, DataType::U16, 1, fixed_point_position); + Tensor dst = create_tensor(shape, DataType::U32, 1, fixed_point_position); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(S16_to_U8) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + int fixed_point_position = 0; + + // Create tensors + Tensor src = create_tensor(shape, DataType::S16, 1, fixed_point_position); + Tensor dst = create_tensor(shape, DataType::U8, 1, fixed_point_position); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(S16_to_S32) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + int fixed_point_position = 0; + + // Create tensors + Tensor src = create_tensor(shape, DataType::S16, 1, fixed_point_position); + Tensor dst = create_tensor(shape, DataType::S32, 1, fixed_point_position); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(Quantized_to_FP32) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset), + shape, dt, policy, fixed_point_position) +{ + int shift = 0; + + // Create tensors + Tensor src = create_tensor(shape, dt, 1, fixed_point_position); + Tensor dst = create_tensor(shape, DataType::F32, 1, fixed_point_position); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} +FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertLayerToFP32FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), + DepthConvertLayerQS8toFP32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertLayerToFP32FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), + DepthConvertLayerQS16toFP32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertLayerToFP32FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), + DepthConvertLayerQS8toFP32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLargeQS16, NEDepthConvertLayerToFP32FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), + DepthConvertLayerQS16toFP32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(FP32_to_Quantized) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset), + shape, dt, policy, fixed_point_position) +{ + int shift = 0; + + // Create tensors + Tensor src = create_tensor(shape, DataType::F32, 1, fixed_point_position); + Tensor dst = create_tensor(shape, dt, 1, fixed_point_position); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} +FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertLayerToQS8FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), + DepthConvertLayerFP32toQS8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertLayerToQS16FixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), + DepthConvertLayerFP32toQS16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertLayerToQS8FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), + DepthConvertLayerFP32toQS8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLargeQS16, NEDepthConvertLayerToQS16FixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), + DepthConvertLayerFP32toQS16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerFixedPointQuantizedDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/NEON/DepthwiseConvolution.cpp b/tests/validation/NEON/DepthwiseConvolution.cpp deleted file mode 100644 index 3a4b7aa2e9..0000000000 --- a/tests/validation/NEON/DepthwiseConvolution.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/DepthwiseConvolutionDataset.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/DepthwiseConvolutionFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -constexpr RelativeTolerance tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(DepthwiseConvolutionLayer) - -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionDataset3x3(), datasets::LargeDepthwiseConvolutionDataset3x3()), - framework::dataset::make("DataType", DataType::F32)), - input_shape, weights_shape, bias_shape, output_shape, info, data_type) -{ - // Create tensors - Tensor src = create_tensor(input_shape, data_type); - Tensor dst = create_tensor(output_shape, data_type); - Tensor weights = create_tensor(weights_shape, data_type); - Tensor bias = create_tensor(bias_shape, data_type); - - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Create and configure function - NEDepthwiseConvolution3x3 depthwise_layer; - depthwise_layer.configure(&src, &weights, &bias, &dst, info); - - // Validate valid region - const ValidRegion input_valid_region = shape_to_valid_region(input_shape); - const ValidRegion output_valid_region = shape_to_valid_region(output_shape); - const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape); - const ValidRegion bias_valid_region = shape_to_valid_region(bias_shape); - - validate(src.info()->valid_region(), input_valid_region); - validate(dst.info()->valid_region(), output_valid_region); - validate(weights.info()->valid_region(), weights_valid_region); - validate(bias.info()->valid_region(), bias_valid_region); - - // Validate padding - const int step = 16 >> info.stride().first; - const PaddingSize padding = PaddingCalculator(output_shape.x(), step).required_padding(); - validate(dst.info()->padding(), padding); -} - -TEST_SUITE(Float) -TEST_SUITE(F32) -TEST_SUITE(Generic) -template -using NEDepthwiseConvolutionFixture = DepthwiseConvolutionValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallDepthwiseConvolutionDataset(), framework::dataset::make("DataType", - DataType::F32))) -{ - validate(Accessor(_target), _reference, tolerance_f32); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionDataset(), framework::dataset::make("DataType", - DataType::F32))) -{ - validate(Accessor(_target), _reference, tolerance_f32); -} -TEST_SUITE_END() - -TEST_SUITE(W3x3) -template -using NEDepthwiseConvolutionFixture3x3 = DepthwiseConvolutionValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionFixture3x3, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionDataset3x3(), framework::dataset::make("DataType", - DataType::F32))) -{ - validate(Accessor(_target), _reference, tolerance_f32); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionFixture3x3, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionDataset3x3(), framework::dataset::make("DataType", - DataType::F32))) -{ - validate(Accessor(_target), _reference, tolerance_f32); -} -TEST_SUITE_END() - -TEST_SUITE_END() -TEST_SUITE_END() - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp new file mode 100644 index 0000000000..17eaaf8ad7 --- /dev/null +++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" +#include "tests/NEON/Accessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/DepthwiseConvolutionLayerDataset.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +constexpr RelativeTolerance tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +} // namespace + +TEST_SUITE(NEON) +TEST_SUITE(DepthwiseConvolutionLayer) + +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + datasets::LargeDepthwiseConvolutionLayerDataset3x3()), + framework::dataset::make("DataType", DataType::F32)), + input_shape, weights_shape, bias_shape, output_shape, info, data_type) +{ + // Create tensors + Tensor src = create_tensor(input_shape, data_type); + Tensor dst = create_tensor(output_shape, data_type); + Tensor weights = create_tensor(weights_shape, data_type); + Tensor bias = create_tensor(bias_shape, data_type); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + NEDepthwiseConvolutionLayer3x3 depthwise_layer; + depthwise_layer.configure(&src, &weights, &bias, &dst, info); + + // Validate valid region + const ValidRegion input_valid_region = shape_to_valid_region(input_shape); + const ValidRegion output_valid_region = shape_to_valid_region(output_shape); + const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape); + const ValidRegion bias_valid_region = shape_to_valid_region(bias_shape); + + validate(src.info()->valid_region(), input_valid_region); + validate(dst.info()->valid_region(), output_valid_region); + validate(weights.info()->valid_region(), weights_valid_region); + validate(bias.info()->valid_region(), bias_valid_region); + + // Validate padding + const int step = 16 >> info.stride().first; + const PaddingSize padding = PaddingCalculator(output_shape.x(), step).required_padding(); + validate(dst.info()->padding(), padding); +} + +TEST_SUITE(Float) +TEST_SUITE(F32) +TEST_SUITE(Generic) +template +using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture; +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + framework::dataset::make("DataType", + DataType::F32))) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + framework::dataset::make("DataType", + DataType::F32))) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() + +TEST_SUITE(W3x3) +template +using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture; +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DataType", + DataType::F32))) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DataType", + DataType::F32))) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/NEON/L2Normalize.cpp b/tests/validation/NEON/L2Normalize.cpp deleted file mode 100644 index ceffa6d510..0000000000 --- a/tests/validation/NEON/L2Normalize.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEL2Normalize.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/L2NormalizeFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -/** Tolerance for float operations */ -RelativeTolerance tolerance_f32(0.00001f); -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(L2Normalize) - -template -using NEL2NormalizeFixture = L2NormalizeValidationFixture; - -TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEL2NormalizeFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 }))) -{ - // Validate output - validate(Accessor(_target), _reference, tolerance_f32); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 }))) -{ - // Validate output - validate(Accessor(_target), _reference, tolerance_f32); -} -TEST_SUITE_END() - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/L2NormalizeLayer.cpp b/tests/validation/NEON/L2NormalizeLayer.cpp new file mode 100644 index 0000000000..c0f5920964 --- /dev/null +++ b/tests/validation/NEON/L2NormalizeLayer.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" +#include "tests/NEON/Accessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/L2NormalizeLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Tolerance for float operations */ +RelativeTolerance tolerance_f32(0.00001f); +} // namespace + +TEST_SUITE(NEON) +TEST_SUITE(L2NormalizeLayer) + +template +using NEL2NormalizeLayerFixture = L2NormalizeLayerValidationFixture; + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, NEL2NormalizeLayerFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeLayerFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/fixtures/DepthConcatenateLayerFixture.h b/tests/validation/fixtures/DepthConcatenateLayerFixture.h index 633dba23e0..103c73e4ea 100644 --- a/tests/validation/fixtures/DepthConcatenateLayerFixture.h +++ b/tests/validation/fixtures/DepthConcatenateLayerFixture.h @@ -43,7 +43,7 @@ namespace test namespace validation { template -class DepthConcatenateValidationFixture : public framework::Fixture +class DepthConcatenateLayerValidationFixture : public framework::Fixture { public: template diff --git a/tests/validation/fixtures/DepthConvertFixture.h b/tests/validation/fixtures/DepthConvertFixture.h deleted file mode 100644 index b132a9341d..0000000000 --- a/tests/validation/fixtures/DepthConvertFixture.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE -#define ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE - -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" -#include "tests/AssetsLibrary.h" -#include "tests/Globals.h" -#include "tests/IAccessor.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Fixture.h" -#include "tests/validation/CPP/DepthConvert.h" -#include "tests/validation/Helpers.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -template -class DepthConvertValidationFixedPointFixture : public framework::Fixture -{ -public: - template - void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fractional_bits) - { - _shift = shift; - _fractional_bits = fractional_bits; - _target = compute_target(shape, dt_in, dt_out, policy, shift, fractional_bits); - _reference = compute_reference(shape, dt_in, dt_out, policy, shift, fractional_bits); - } - -protected: - template - void fill(U &&tensor, int i) - { - library->fill_tensor_uniform(tensor, i); - } - - TensorType compute_target(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fixed_point_position) - { - // Create tensors - TensorType src = create_tensor(shape, dt_in, 1, static_cast(fixed_point_position)); - TensorType dst = create_tensor(shape, dt_out, 1, static_cast(fixed_point_position)); - - // Create and configure function - FunctionType depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Allocate tensors - src.allocator()->allocate(); - dst.allocator()->allocate(); - - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Fill tensors - fill(AccessorType(src), 0); - - // Compute function - depth_convert.run(); - - return dst; - } - - SimpleTensor compute_reference(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fixed_point_position) - { - // Create reference - SimpleTensor src{ shape, dt_in, 1, static_cast(fixed_point_position) }; - - // Fill reference - fill(src, 0); - - return reference::depth_convert(src, dt_out, policy, shift); - } - - TensorType _target{}; - SimpleTensor _reference{}; - int _fractional_bits{}; - int _shift{}; -}; -template -class DepthConvertValidationFixture : public DepthConvertValidationFixedPointFixture -{ -public: - template - void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift) - { - DepthConvertValidationFixedPointFixture::setup(shape, dt_in, dt_out, policy, shift, 0); - } -}; -template -class DepthConvertValidationFractionalBitsFixture : public DepthConvertValidationFixedPointFixture -{ -public: - template - void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t fractional_bits) - { - DepthConvertValidationFixedPointFixture::setup(shape, dt_in, dt_out, policy, 0, fractional_bits); - } -}; -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE */ diff --git a/tests/validation/fixtures/DepthConvertLayerFixture.h b/tests/validation/fixtures/DepthConvertLayerFixture.h new file mode 100644 index 0000000000..c2fdc75bb4 --- /dev/null +++ b/tests/validation/fixtures/DepthConvertLayerFixture.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE +#define ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/CPP/DepthConvertLayer.h" +#include "tests/validation/Helpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template +class DepthConvertLayerValidationFixedPointFixture : public framework::Fixture +{ +public: + template + void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fractional_bits) + { + _shift = shift; + _fractional_bits = fractional_bits; + _target = compute_target(shape, dt_in, dt_out, policy, shift, fractional_bits); + _reference = compute_reference(shape, dt_in, dt_out, policy, shift, fractional_bits); + } + +protected: + template + void fill(U &&tensor, int i) + { + library->fill_tensor_uniform(tensor, i); + } + + TensorType compute_target(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fixed_point_position) + { + // Create tensors + TensorType src = create_tensor(shape, dt_in, 1, static_cast(fixed_point_position)); + TensorType dst = create_tensor(shape, dt_out, 1, static_cast(fixed_point_position)); + + // Create and configure function + FunctionType depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Fill tensors + fill(AccessorType(src), 0); + + // Compute function + depth_convert.run(); + + return dst; + } + + SimpleTensor compute_reference(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fixed_point_position) + { + // Create reference + SimpleTensor src{ shape, dt_in, 1, static_cast(fixed_point_position) }; + + // Fill reference + fill(src, 0); + + return reference::depth_convert(src, dt_out, policy, shift); + } + + TensorType _target{}; + SimpleTensor _reference{}; + int _fractional_bits{}; + int _shift{}; +}; +template +class DepthConvertLayerValidationFixture : public DepthConvertLayerValidationFixedPointFixture +{ +public: + template + void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift) + { + DepthConvertLayerValidationFixedPointFixture::setup(shape, dt_in, dt_out, policy, shift, 0); + } +}; +template +class DepthConvertLayerValidationFractionalBitsFixture : public DepthConvertLayerValidationFixedPointFixture +{ +public: + template + void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t fractional_bits) + { + DepthConvertLayerValidationFixedPointFixture::setup(shape, dt_in, dt_out, policy, 0, fractional_bits); + } +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE */ diff --git a/tests/validation/fixtures/DepthwiseConvolutionFixture.h b/tests/validation/fixtures/DepthwiseConvolutionFixture.h deleted file mode 100644 index b1d31d657a..0000000000 --- a/tests/validation/fixtures/DepthwiseConvolutionFixture.h +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE -#define ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE - -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" -#include "tests/AssetsLibrary.h" -#include "tests/Globals.h" -#include "tests/IAccessor.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Fixture.h" -#include "tests/validation/CPP/DepthwiseConvolution.h" -#include "tests/validation/Helpers.h" - -#include - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -template -class DepthwiseConvolutionValidationGenericFixture : public framework::Fixture -{ -public: - using TBias = typename std::conditional::type, uint8_t>::value, int32_t, T>::type; - -public: - template - void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info) - { - _quantization_info = quantization_info; - _data_type = data_type; - - const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; - - _target = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info); - _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info); - } - -protected: - template - void fill(U &&tensor, int i) - { - switch(tensor.data_type()) - { - case DataType::QASYMM8: - { - std::uniform_int_distribution distribution(0, 10); - library->fill(tensor, distribution, i); - break; - } - case DataType::F32: - { - std::uniform_real_distribution<> distribution(-1.0f, 1.0f); - library->fill(tensor, distribution, i); - break; - } - case DataType::S32: - { - std::uniform_int_distribution distribution(-1000, 1000); - library->fill(tensor, distribution, i); - break; - } - default: - library->fill_tensor_uniform(tensor, i); - } - } - - TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &output_shape, PadStrideInfo &pad_stride_info, - const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info) - { - // Create tensors - TensorType src = create_tensor(input_shape, data_type, 1, 0, quantization_info); - TensorType weights = create_tensor(weights_shape, data_type, 1, 0, quantization_info); - TensorType biases = create_tensor(biases_shape, bias_data_type, 1, 0, quantization_info); - TensorType dst = create_tensor(output_shape, data_type, 1, 0, quantization_info); - - // Create Depthwise Convolution configure function - FunctionType dwc; - dwc.configure(&src, &weights, &biases, &dst, pad_stride_info); - - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(biases.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Allocate tensors - src.allocator()->allocate(); - weights.allocator()->allocate(); - biases.allocator()->allocate(); - dst.allocator()->allocate(); - - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!biases.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Fill tensors - fill(AccessorType(src), 0); - fill(AccessorType(weights), 1); - fill(AccessorType(biases), 2); - - // Compute function - dwc.run(); - - return dst; - } - - SimpleTensor compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info, - const DataType data_type, const DataType bias_data_type, QuantizationInfo quantization_info) - { - SimpleTensor src{ in_shape, data_type, 1, 0, quantization_info }; - SimpleTensor weights{ weights_shape, data_type, 1, 0, quantization_info }; - SimpleTensor biases{ biases_shape, data_type, 1, 0, quantization_info }; - - fill(src, 0); - fill(weights, 1); - fill(biases, 2); - - return reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info); - } - - TensorType _target{}; - SimpleTensor _reference{}; - DataType _data_type{}; - QuantizationInfo _quantization_info{}; -}; - -template -class DepthwiseConvolutionValidationFixture : public DepthwiseConvolutionValidationGenericFixture -{ -public: - template - void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type) - { - DepthwiseConvolutionValidationGenericFixture::setup(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, - data_type, QuantizationInfo()); - } -}; - -template -class DepthwiseConvolutionValidationQuantizedFixture : public DepthwiseConvolutionValidationGenericFixture -{ -public: - template - void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info) - { - DepthwiseConvolutionValidationGenericFixture::setup(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, - data_type, quantization_info); - } -}; -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE */ diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h new file mode 100644 index 0000000000..0af3fdf6c4 --- /dev/null +++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE +#define ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/CPP/DepthwiseConvolutionLayer.h" +#include "tests/validation/Helpers.h" + +#include + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template +class DepthwiseConvolutionLayerValidationGenericFixture : public framework::Fixture +{ +public: + using TBias = typename std::conditional::type, uint8_t>::value, int32_t, T>::type; + +public: + template + void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info) + { + _quantization_info = quantization_info; + _data_type = data_type; + + const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; + + _target = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info); + _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info); + } + +protected: + template + void fill(U &&tensor, int i) + { + switch(tensor.data_type()) + { + case DataType::QASYMM8: + { + std::uniform_int_distribution distribution(0, 10); + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + case DataType::S32: + { + std::uniform_int_distribution distribution(-1000, 1000); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + + TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &output_shape, PadStrideInfo &pad_stride_info, + const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info) + { + // Create tensors + TensorType src = create_tensor(input_shape, data_type, 1, 0, quantization_info); + TensorType weights = create_tensor(weights_shape, data_type, 1, 0, quantization_info); + TensorType biases = create_tensor(biases_shape, bias_data_type, 1, 0, quantization_info); + TensorType dst = create_tensor(output_shape, data_type, 1, 0, quantization_info); + + // Create Depthwise Convolution configure function + FunctionType dwc; + dwc.configure(&src, &weights, &biases, &dst, pad_stride_info); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(biases.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Allocate tensors + src.allocator()->allocate(); + weights.allocator()->allocate(); + biases.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!biases.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Fill tensors + fill(AccessorType(src), 0); + fill(AccessorType(weights), 1); + fill(AccessorType(biases), 2); + + // Compute function + dwc.run(); + + return dst; + } + + SimpleTensor compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info, + const DataType data_type, const DataType bias_data_type, QuantizationInfo quantization_info) + { + SimpleTensor src{ in_shape, data_type, 1, 0, quantization_info }; + SimpleTensor weights{ weights_shape, data_type, 1, 0, quantization_info }; + SimpleTensor biases{ biases_shape, data_type, 1, 0, quantization_info }; + + fill(src, 0); + fill(weights, 1); + fill(biases, 2); + + return reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info); + } + + TensorType _target{}; + SimpleTensor _reference{}; + DataType _data_type{}; + QuantizationInfo _quantization_info{}; +}; + +template +class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture +{ +public: + template + void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type) + { + DepthwiseConvolutionLayerValidationGenericFixture::setup(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, + data_type, QuantizationInfo()); + } +}; + +template +class DepthwiseConvolutionLayerValidationQuantizedFixture : public DepthwiseConvolutionLayerValidationGenericFixture +{ +public: + template + void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info) + { + DepthwiseConvolutionLayerValidationGenericFixture::setup(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, + data_type, quantization_info); + } +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE */ diff --git a/tests/validation/fixtures/L2NormalizeFixture.h b/tests/validation/fixtures/L2NormalizeFixture.h deleted file mode 100644 index e6113937f1..0000000000 --- a/tests/validation/fixtures/L2NormalizeFixture.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE -#define ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE - -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/Tensor.h" -#include "tests/AssetsLibrary.h" -#include "tests/Globals.h" -#include "tests/IAccessor.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Fixture.h" -#include "tests/validation/CPP/L2Normalize.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -template -class L2NormalizeValidationFixture : public framework::Fixture -{ -public: - template - void setup(TensorShape shape, DataType data_type, unsigned int axis, float epsilon) - { - _target = compute_target(shape, data_type, axis, epsilon); - _reference = compute_reference(shape, data_type, axis, epsilon); - } - -protected: - template - void fill(U &&tensor) - { - library->fill_tensor_uniform(tensor, 0); - } - - TensorType compute_target(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon) - { - // Create tensors - TensorType src = create_tensor(shape, data_type); - TensorType dst = create_tensor(shape, data_type); - - // Create and configure function - FunctionType l2_norm_func; - l2_norm_func.configure(&src, &dst, axis, epsilon); - - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Allocate tensors - src.allocator()->allocate(); - dst.allocator()->allocate(); - - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Fill tensors - fill(AccessorType(src)); - - // Compute function - l2_norm_func.run(); - - return dst; - } - - SimpleTensor compute_reference(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon) - { - // Create reference - SimpleTensor src{ shape, data_type }; - - // Fill reference - fill(src); - - return reference::l2_normalize(src, axis, epsilon); - } - - TensorType _target{}; - SimpleTensor _reference{}; -}; -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE */ diff --git a/tests/validation/fixtures/L2NormalizeLayerFixture.h b/tests/validation/fixtures/L2NormalizeLayerFixture.h new file mode 100644 index 0000000000..7bb95883f7 --- /dev/null +++ b/tests/validation/fixtures/L2NormalizeLayerFixture.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE +#define ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/CPP/L2NormalizeLayer.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template +class L2NormalizeLayerValidationFixture : public framework::Fixture +{ +public: + template + void setup(TensorShape shape, DataType data_type, unsigned int axis, float epsilon) + { + _target = compute_target(shape, data_type, axis, epsilon); + _reference = compute_reference(shape, data_type, axis, epsilon); + } + +protected: + template + void fill(U &&tensor) + { + library->fill_tensor_uniform(tensor, 0); + } + + TensorType compute_target(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon) + { + // Create tensors + TensorType src = create_tensor(shape, data_type); + TensorType dst = create_tensor(shape, data_type); + + // Create and configure function + FunctionType l2_norm_func; + l2_norm_func.configure(&src, &dst, axis, epsilon); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Fill tensors + fill(AccessorType(src)); + + // Compute function + l2_norm_func.run(); + + return dst; + } + + SimpleTensor compute_reference(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon) + { + // Create reference + SimpleTensor src{ shape, data_type }; + + // Fill reference + fill(src); + + return reference::l2_normalize(src, axis, epsilon); + } + + TensorType _target{}; + SimpleTensor _reference{}; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE */ -- cgit v1.2.1