From ff850937ddfd3095b3cbe32e5c255817e5ccfeda Mon Sep 17 00:00:00 2001 From: Gian Marco Date: Mon, 11 Dec 2017 12:37:17 +0000 Subject: COMPMID-741 - Changelog for major release 17.12 Change-Id: If71190e398217ca6ca44df822554998d047b79db Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112716 Reviewed-by: Anthony Barbier Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com --- arm_compute/core/GLES_COMPUTE/GCKernels.h | 2 +- .../core/GLES_COMPUTE/kernels/GCDropoutKernel.h | 79 --------------- .../GLES_COMPUTE/kernels/GCDropoutLayerKernel.h | 79 +++++++++++++++ .../GLES_COMPUTE/functions/GCDropoutLayer.h | 6 +- docs/00_introduction.dox | 55 +++++++++++ docs/Doxyfile | 3 +- src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp | 107 --------------------- .../GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp | 107 +++++++++++++++++++++ 8 files changed, 247 insertions(+), 191 deletions(-) delete mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h delete mode 100644 src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp diff --git a/arm_compute/core/GLES_COMPUTE/GCKernels.h b/arm_compute/core/GLES_COMPUTE/GCKernels.h index 9831e25299..417c98af67 100644 --- a/arm_compute/core/GLES_COMPUTE/GCKernels.h +++ b/arm_compute/core/GLES_COMPUTE/GCKernels.h @@ -31,7 +31,7 @@ #include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h" #include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h" diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h deleted file mode 100644 index 6159a7af26..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __ARM_COMPUTE_GCDROPOUTKERNEL_H__ -#define __ARM_COMPUTE_GCDROPOUTKERNEL_H__ - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the dropout kernel. - * - * Dropout is used to improve over-fit on neural networks. - * - */ -class GCDropoutKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCDropoutKernel(); - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCDropoutKernel(const GCDropoutKernel &) = delete; - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCDropoutKernel &operator=(const GCDropoutKernel &) = delete; - - /** Allow instances of this class to be moved */ - GCDropoutKernel(GCDropoutKernel &&) = default; - - /** Allow instances of this class to be moved */ - GCDropoutKernel &operator=(GCDropoutKernel &&) = default; - - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor for this op. Data types supported: F16/F32 - * @param[out] mask The mask tensor. Data types supported: Same as @p input - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] ratio Dropout ratio - * @param[in] forward Forward or backward propagation - * - */ - void configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input; - IGCTensor *_mask; - IGCTensor *_output; - unsigned int _num_elems_processed_per_iteration; -}; -} - -#endif /*__ARM_COMPUTE_GCDROPOUTKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h new file mode 100644 index 0000000000..9f04411d92 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCDROPOUTLAYERKERNEL_H__ +#define __ARM_COMPUTE_GCDROPOUTLAYERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the dropout layer kernel. + * + * Dropout is used to improve over-fit on neural networks. + * + */ +class GCDropoutLayerKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCDropoutLayerKernel(); + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDropoutLayerKernel(const GCDropoutLayerKernel &) = delete; + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDropoutLayerKernel &operator=(const GCDropoutLayerKernel &) = delete; + + /** Allow instances of this class to be moved */ + GCDropoutLayerKernel(GCDropoutLayerKernel &&) = default; + + /** Allow instances of this class to be moved */ + GCDropoutLayerKernel &operator=(GCDropoutLayerKernel &&) = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor for this op. Data types supported: F16/F32 + * @param[out] mask The mask tensor. Data types supported: Same as @p input + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] ratio Dropout ratio + * @param[in] forward Forward or backward propagation + * + */ + void configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + IGCTensor *_mask; + IGCTensor *_output; + unsigned int _num_elems_processed_per_iteration; +}; +} + +#endif /*__ARM_COMPUTE_GCDROPOUTLAYERKERNEL_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h index 6a08d96676..c51d2c1613 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h @@ -25,7 +25,7 @@ #ifndef __ARM_COMPUTE_GCDROPOUTLAYER_H__ #define __ARM_COMPUTE_GCDROPOUTLAYER_H__ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h" #include "arm_compute/runtime/IFunction.h" namespace arm_compute @@ -33,7 +33,7 @@ namespace arm_compute class IGCTensor; /** Basic function to do dropout op. This function calls the following kernels: * - * -# @ref GCDropoutKernel + * -# @ref GCDropoutLayerKernel */ class GCDropoutLayer : public IFunction { @@ -56,7 +56,7 @@ public: void run() override; private: - GCDropoutKernel _dropout_kernel; + GCDropoutLayerKernel _dropout_kernel; }; } diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index bf43b3abb3..4d6fafa980 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -155,6 +155,61 @@ If there is more than one release in a month then an extra sequential number is @subsection S2_2_changelog Changelog +v17.12 Public major release + - Most machine learning functions on OpenCL support the new data type QASYMM8 + - Introduced logging interface + - Introduced opencl timer + - Reworked GEMMLowp interface + - Added new NEON assembly kernels for GEMMLowp, SGEMM and HGEMM + - Added validation method for most Machine Learning kernels / functions + - Added new graph examples such as googlenet, mobilenet, squeezenet, vgg16 and vgg19 + - Added sgemm example for OpenCL + - Added absolute difference example for GLES compute + - Added new tests and benchmarks in validation and benchmark frameworks + - Added new kernels / functions for GLES compute + + - New OpenGL ES kernels / functions + - @ref arm_compute::GCAbsoluteDifferenceKernel / @ref arm_compute::GCAbsoluteDifference + - @ref arm_compute::GCActivationLayerKernel / @ref arm_compute::GCActivationLayer + - @ref arm_compute::GCBatchNormalizationLayerKernel / @ref arm_compute::GCBatchNormalizationLayer + - @ref arm_compute::GCCol2ImKernel + - @ref arm_compute::GCDepthConcatenateLayerKernel / @ref arm_compute::GCDepthConcatenateLayer + - @ref arm_compute::GCDirectConvolutionLayerKernel / @ref arm_compute::GCDirectConvolutionLayer + - @ref arm_compute::GCDropoutLayerKernel / @ref arm_compute::GCDropoutLayer + - @ref arm_compute::GCFillBorderKernel / @ref arm_compute::GCFillBorder + - @ref arm_compute::GCGEMMInterleave4x4Kernel / @ref arm_compute::GCGEMMInterleave4x4 + - @ref arm_compute::GCGEMMMatrixAccumulateBiasesKernel / @ref arm_compute::GCGEMMMatrixAdditionKernel / @ref arm_compute::GCGEMMMatrixMultiplyKernel / @ref arm_compute::GCGEMM + - @ref arm_compute::GCGEMMTranspose1xWKernel / @ref arm_compute::GCGEMMTranspose1xW + - @ref arm_compute::GCIm2ColKernel + - @ref arm_compute::GCNormalizationLayerKernel / @ref arm_compute::GCNormalizationLayer + - @ref arm_compute::GCPixelWiseMultiplicationKernel / @ref arm_compute::GCPixelWiseMultiplication + - @ref arm_compute::GCPoolingLayerKernel / @ref arm_compute::GCPoolingLayer + - @ref arm_compute::GCLogits1DMaxKernel / @ref arm_compute::GCLogits1DShiftExpSumKernel / @ref arm_compute::GCLogits1DNormKernel / @ref arm_compute::GCSoftmaxLayer + - @ref arm_compute::GCTransposeKernel / @ref arm_compute::GCTranspose + + - New NEON kernels / functions + - @ref arm_compute::NEGEMMLowpAArch64A53Kernel / @ref arm_compute::NEGEMMLowpAArch64Kernel / @ref arm_compute::NEGEMMLowpAArch64V8P4Kernel / arm_compute::NEGEMMInterleavedBlockedKernel / @ref arm_compute::NEGEMMLowpAssemblyMatrixMultiplyCore + - @ref arm_compute::NEHGEMMAArch64FP16Kernel + - @ref arm_compute::NEDepthwiseConvolutionLayer3x3Kernel / @ref arm_compute::NEDepthwiseIm2ColKernel / @ref arm_compute::NEGEMMMatrixVectorMultiplyKernel / @ref arm_compute::NEDepthwiseVectorToTensorKernel / @ref arm_compute::NEDepthwiseConvolutionLayer + - @ref arm_compute::NEGEMMLowpOffsetContributionKernel / @ref arm_compute::NEGEMMLowpMatrixAReductionKernel / @ref arm_compute::NEGEMMLowpMatrixBReductionKernel / @ref arm_compute::NEGEMMLowpMatrixMultiplyCore + - @ref arm_compute::NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel / @ref arm_compute::NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint + - @ref arm_compute::NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel / @ref arm_compute::NEGEMMLowpQuantizeDownInt32ToUint8Scale + - @ref arm_compute::NEWinogradLayerKernel / @ref arm_compute::NEWinogradLayer + + - New OpenCL kernels / functions + - @ref arm_compute::CLGEMMLowpOffsetContributionKernel / @ref arm_compute::CLGEMMLowpMatrixAReductionKernel / @ref arm_compute::CLGEMMLowpMatrixBReductionKernel / @ref arm_compute::CLGEMMLowpMatrixMultiplyCore + - @ref arm_compute::CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel / @ref arm_compute::CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint + - @ref arm_compute::CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel / @ref arm_compute::CLGEMMLowpQuantizeDownInt32ToUint8Scale + + - New graph nodes for NEON and OpenCL + - @ref arm_compute::graph::BranchLayer + - @ref arm_compute::graph::DepthConvertLayer + - @ref arm_compute::graph::DepthwiseConvolutionLayer + - @ref arm_compute::graph::DequantizationLayer + - @ref arm_compute::graph::FlattenLayer + - @ref arm_compute::graph::QuantizationLayer + - @ref arm_compute::graph::ReshapeLayer + v17.10 Public maintenance release - Bug fixes: - Check the maximum local workgroup size supported by OpenCL devices diff --git a/docs/Doxyfile b/docs/Doxyfile index a3e4f5c908..de0320f38b 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -2076,7 +2076,8 @@ PREDEFINED = DOXYGEN_SKIP_THIS \ LOCATE_MAX \ HAS_BIAS \ POOL_AVG \ - ARM_COMPUTE_AARCH64_V8_2 + ARM_COMPUTE_AARCH64_V8_2 \ + ARM_COMPUTE_AARCH64_V8A # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this diff --git a/src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp deleted file mode 100644 index cdd6a9d989..0000000000 --- a/src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "support/ToolchainSupport.h" - -#include -#include -#include - -using namespace arm_compute; - -GCDropoutKernel::GCDropoutKernel() - : _input(nullptr), _mask(nullptr), _output(nullptr), _num_elems_processed_per_iteration(0) -{ -} - -void GCDropoutKernel::configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, mask, output); - - _input = input; - _mask = mask; - _output = output; - - std::set build_opts; - std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; - std::string fporbp = forward ? "FORWARD" : "BACKWARD"; - std::random_device rd; - std::mt19937 mt(rd()); - std::uniform_real_distribution dist(0.f, 1.f); - - build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); - build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); - build_opts.emplace("#define RATIO " + support::cpp11::to_string(ratio)); - build_opts.emplace("#define SCALE " + support::cpp11::to_string(1. / (1. - ratio))); - build_opts.emplace("#define SEED " + support::cpp11::to_string(dist(mt))); - build_opts.emplace("#define " + dt_name); - build_opts.emplace("#define " + fporbp); - - _num_elems_processed_per_iteration = 4 / input->info()->element_size(); - - // Create kernel - _kernel = static_cast(GCKernelLibrary::get().create_kernel("dropout", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration)); - - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - - IGCKernel::configure(win); -} - -void GCDropoutKernel::run(const Window &window) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); - - _kernel.use(); - - Window slice = window.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - - add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice); - add_3D_tensor_argument(idx, _mask, BufferParam(2, 2), slice); - add_3D_tensor_argument(idx, _output, BufferParam(3, 2), slice); - - _kernel.update_shader_params(); - enqueue(*this, slice); - } - while(window.slide_window_slice_3D(slice)); -} diff --git a/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp new file mode 100644 index 0000000000..e87c902281 --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "support/ToolchainSupport.h" + +#include +#include +#include + +using namespace arm_compute; + +GCDropoutLayerKernel::GCDropoutLayerKernel() + : _input(nullptr), _mask(nullptr), _output(nullptr), _num_elems_processed_per_iteration(0) +{ +} + +void GCDropoutLayerKernel::configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, mask, output); + + _input = input; + _mask = mask; + _output = output; + + std::set build_opts; + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + std::string fporbp = forward ? "FORWARD" : "BACKWARD"; + std::random_device rd; + std::mt19937 mt(rd()); + std::uniform_real_distribution dist(0.f, 1.f); + + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + build_opts.emplace("#define RATIO " + support::cpp11::to_string(ratio)); + build_opts.emplace("#define SCALE " + support::cpp11::to_string(1. / (1. - ratio))); + build_opts.emplace("#define SEED " + support::cpp11::to_string(dist(mt))); + build_opts.emplace("#define " + dt_name); + build_opts.emplace("#define " + fporbp); + + _num_elems_processed_per_iteration = 4 / input->info()->element_size(); + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("dropout", build_opts)); + + // Configure kernel window + Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration)); + + output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); + + IGCKernel::configure(win); +} + +void GCDropoutLayerKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); + + _kernel.use(); + + Window slice = window.first_slice_window_3D(); + + do + { + unsigned int idx = 0; + + add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice); + add_3D_tensor_argument(idx, _mask, BufferParam(2, 2), slice); + add_3D_tensor_argument(idx, _output, BufferParam(3, 2), slice); + + _kernel.update_shader_params(); + enqueue(*this, slice); + } + while(window.slide_window_slice_3D(slice)); +} -- cgit v1.2.1