diff options
author | Anthony Barbier <anthony.barbier@arm.com> | 2017-09-04 18:44:23 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 13:03:09 +0100 |
commit | 6ff3b19ee6120edf015fad8caab2991faa3070af (patch) | |
tree | a7a6dcd16dfd56d79fa1b56a313caeebcc939b68 /examples | |
download | ComputeLibrary-6ff3b19ee6120edf015fad8caab2991faa3070af.tar.gz |
COMPMID-344 Updated doxygen
Change-Id: I32f7b84daa560e460b77216add529c8fa8b327ae
Diffstat (limited to 'examples')
-rw-r--r-- | examples/SConscript | 70 | ||||
-rw-r--r-- | examples/cl_convolution.cpp | 118 | ||||
-rw-r--r-- | examples/cl_events.cpp | 114 | ||||
-rw-r--r-- | examples/neon_cnn.cpp | 230 | ||||
-rw-r--r-- | examples/neon_convolution.cpp | 117 | ||||
-rw-r--r-- | examples/neon_copy_objects.cpp | 152 | ||||
-rw-r--r-- | examples/neon_scale.cpp | 90 | ||||
-rw-r--r-- | examples/neoncl_scale_median_gaussian.cpp | 126 |
8 files changed, 1017 insertions, 0 deletions
diff --git a/examples/SConscript b/examples/SConscript new file mode 100644 index 0000000000..748f771ec7 --- /dev/null +++ b/examples/SConscript @@ -0,0 +1,70 @@ +# Copyright (c) 2017 ARM Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +import SCons +import os.path + +Import('env') +Import('arm_compute_a') +Import('arm_compute_so') + +if env['opencl']: + Import('opencl') + +examples_env = env.Clone() + +examples_env.Append(CPPPATH = ["#"]) +examples_env.Append(LIBPATH = ["#build/%s" % env['build_dir']]) +examples_env.Append(LIBPATH = ["#build/%s/opencl-1.2-stubs" % env['build_dir']]) + +# Build examples +utils = examples_env.Object("../utils/Utils.cpp") + +if env['os'] in ['android', 'bare_metal']: + arm_compute_lib = arm_compute_a + arm_compute_dependency = arm_compute_a +else: + arm_compute_lib = "arm_compute" + arm_compute_dependency = arm_compute_so + +if env['opencl'] and env['neon']: + for file in Glob("./neoncl_*.cpp"): + example = os.path.basename(os.path.splitext(str(file))[0]) + prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LIBS = [arm_compute_lib, "OpenCL"]) + Depends(prog, [arm_compute_dependency, opencl]) + alias = examples_env.Alias(example, prog) + Default(alias) + +if env['opencl']: + for file in Glob("./cl_*.cpp"): + example = os.path.basename(os.path.splitext(str(file))[0]) + prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LIBS = [arm_compute_lib, "OpenCL"]) + Depends(prog, [arm_compute_dependency, opencl]) + alias = examples_env.Alias(example, prog) + Default(alias) + +if env['neon']: + for file in Glob("./neon_*.cpp"): + example = os.path.basename(os.path.splitext(str(file))[0]) + prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LIBS = [arm_compute_lib]) + Depends(prog, arm_compute_dependency) + alias = examples_env.Alias(example, prog) + Default(alias) diff --git a/examples/cl_convolution.cpp b/examples/cl_convolution.cpp new file mode 100644 index 0000000000..06f6f144e1 --- /dev/null +++ b/examples/cl_convolution.cpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#define ARM_COMPUTE_CL /* So that OpenCL exceptions get caught too */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLFunctions.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "utils/Utils.h" + +using namespace arm_compute; +using namespace utils; + +/** Gaussian 3x3 matrix + */ +const int16_t gaussian3x3[] = +{ + 1, 2, 1, + 2, 4, 2, + 1, 2, 1 +}; + +/** Gaussian 5x5 matrix + */ +const int16_t gaussian5x5[] = +{ + 1, 4, 6, 4, 1, + 4, 16, 24, 16, 4, + 6, 24, 36, 24, 6, + 4, 16, 24, 16, 4, + 1, 4, 6, 4, 1 +}; + +void main_cl_convolution(int argc, const char **argv) +{ + PPMLoader ppm; + CLImage src, tmp, dst; + + CLScheduler::get().default_init(); + + if(argc < 2) + { + // Print help + std::cout << "Usage: ./build/cl_convolution [input_image.ppm]\n\n"; + std::cout << "No input_image provided, creating a dummy 640x480 image\n"; + // Create an empty grayscale 640x480 image + src.allocator()->init(TensorInfo(640, 480, Format::U8)); + } + else + { + ppm.open(argv[1]); + ppm.init_image(src, Format::U8); + } + + // Configure the temporary and destination images + tmp.allocator()->init(*src.info()); + dst.allocator()->init(*src.info()); + + CLConvolution3x3 conv3x3; + CLConvolution5x5 conv5x5; + + // Apply a Gaussian 3x3 filter to the source image followed by a Gaussian 5x5: + conv3x3.configure(&src, &tmp, gaussian3x3, 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED); + conv5x5.configure(&tmp, &dst, gaussian5x5, 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED); + + // Allocate all the images + src.allocator()->allocate(); + tmp.allocator()->allocate(); + dst.allocator()->allocate(); + // Fill the input image with the content of the PPM image if a filename was provided: + if(ppm.is_open()) + { + ppm.fill_image(src); + } + + // Execute the functions: + conv3x3.run(); + conv5x5.run(); + + // Make sure all the OpenCL jobs are done executing: + CLScheduler::get().sync(); + + // Save the result to file: + if(ppm.is_open()) + { + const std::string output_filename = std::string(argv[1]) + "_out.ppm"; + save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM + } +} + +/** Main program for convolution test + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments ( [optional] Path to PPM image to process ) + */ +int main(int argc, const char **argv) +{ + return utils::run_example(argc, argv, main_cl_convolution); +} diff --git a/examples/cl_events.cpp b/examples/cl_events.cpp new file mode 100644 index 0000000000..768f620622 --- /dev/null +++ b/examples/cl_events.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#define ARM_COMPUTE_CL /* So that OpenCL exceptions get caught too */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLFunctions.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "utils/Utils.h" + +using namespace arm_compute; +using namespace utils; + +void main_cl_events(int argc, const char **argv) +{ + /** [OpenCL events] **/ + PPMLoader ppm; + CLImage src, tmp_scale_median, tmp_median_gauss, dst; + constexpr int scale_factor = 2; + + CLScheduler::get().default_init(); + + if(argc < 2) + { + // Print help + std::cout << "Usage: ./build/cl_events [input_image.ppm]\n\n"; + std::cout << "No input_image provided, creating a dummy 640x480 image\n"; + // Create an empty grayscale 640x480 image + src.allocator()->init(TensorInfo(640, 480, Format::U8)); + } + else + { + ppm.open(argv[1]); + ppm.init_image(src, Format::U8); + } + + // Declare and configure the functions to create the following pipeline: scale -> median -> gauss + CLScale scale; + CLMedian3x3 median; + CLGaussian5x5 gauss; + + TensorInfo dst_info(src.info()->dimension(0) / scale_factor, src.info()->dimension(1) / scale_factor, Format::U8); + + // Configure the temporary and destination images + dst.allocator()->init(dst_info); + tmp_scale_median.allocator()->init(dst_info); + tmp_median_gauss.allocator()->init(dst_info); + + //Configure the functions: + scale.configure(&src, &tmp_scale_median, InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::REPLICATE); + median.configure(&tmp_scale_median, &tmp_median_gauss, BorderMode::REPLICATE); + gauss.configure(&tmp_median_gauss, &dst, BorderMode::REPLICATE); + + // Allocate all the images + src.allocator()->allocate(); + dst.allocator()->allocate(); + tmp_scale_median.allocator()->allocate(); + tmp_median_gauss.allocator()->allocate(); + // Fill the input image with the content of the PPM image if a filename was provided: + if(ppm.is_open()) + { + ppm.fill_image(src); + } + + // Enqueue and flush the scale OpenCL kernel: + scale.run(); + // Create a synchronisation event between scale and median: + cl::Event scale_event = CLScheduler::get().enqueue_sync_event(); + // Enqueue and flush the median OpenCL kernel: + median.run(); + // Enqueue and flush the Gaussian OpenCL kernel: + gauss.run(); + + //Make sure all the OpenCL jobs are done executing: + scale_event.wait(); // Block until Scale is done executing (Median3x3 and Gaussian5x5 might still be running) + CLScheduler::get().sync(); // Block until Gaussian5x5 is done executing + + // Save the result to file: + if(ppm.is_open()) + { + const std::string output_filename = std::string(argv[1]) + "_out.ppm"; + save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM + } + /** [OpenCL events] **/ +} + +/** Main program for convolution test + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments ( [optional] Path to PPM image to process ) + */ +int main(int argc, const char **argv) +{ + return utils::run_example(argc, argv, main_cl_events); +} diff --git a/examples/neon_cnn.cpp b/examples/neon_cnn.cpp new file mode 100644 index 0000000000..952ae4d485 --- /dev/null +++ b/examples/neon_cnn.cpp @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/NEFunctions.h" + +#include "arm_compute/core/Types.h" +#include "utils/Utils.h" + +using namespace arm_compute; +using namespace utils; + +void main_cnn(int argc, const char **argv) +{ + ARM_COMPUTE_UNUSED(argc); + ARM_COMPUTE_UNUSED(argv); + + // The src tensor should contain the input image + Tensor src; + + // The weights and biases tensors should be initialized with the values inferred with the training + Tensor weights0; + Tensor weights1; + Tensor weights2; + Tensor biases0; + Tensor biases1; + Tensor biases2; + + Tensor out_conv0; + Tensor out_conv1; + Tensor out_act0; + Tensor out_act1; + Tensor out_act2; + Tensor out_pool0; + Tensor out_pool1; + Tensor out_fc0; + Tensor out_softmax; + + NEConvolutionLayer conv0; + NEConvolutionLayer conv1; + NEPoolingLayer pool0; + NEPoolingLayer pool1; + NEFullyConnectedLayer fc0; + NEActivationLayer act0; + NEActivationLayer act1; + NEActivationLayer act2; + NESoftmaxLayer softmax; + + /* [Initialize tensors] */ + + // Initialize src tensor + constexpr unsigned int width_src_image = 32; + constexpr unsigned int height_src_image = 32; + constexpr unsigned int ifm_src_img = 1; + + const TensorShape src_shape(width_src_image, height_src_image, ifm_src_img); + src.allocator()->init(TensorInfo(src_shape, 1, DataType::F32)); + + // Initialize tensors of conv0 + constexpr unsigned int kernel_x_conv0 = 5; + constexpr unsigned int kernel_y_conv0 = 5; + constexpr unsigned int ofm_conv0 = 8; + + const TensorShape weights_shape_conv0(kernel_x_conv0, kernel_y_conv0, src_shape.z(), ofm_conv0); + const TensorShape biases_shape_conv0(weights_shape_conv0[3]); + const TensorShape out_shape_conv0(src_shape.x(), src_shape.y(), weights_shape_conv0[3]); + + weights0.allocator()->init(TensorInfo(weights_shape_conv0, 1, DataType::F32)); + biases0.allocator()->init(TensorInfo(biases_shape_conv0, 1, DataType::F32)); + out_conv0.allocator()->init(TensorInfo(out_shape_conv0, 1, DataType::F32)); + + // Initialize tensor of act0 + out_act0.allocator()->init(TensorInfo(out_shape_conv0, 1, DataType::F32)); + + // Initialize tensor of pool0 + TensorShape out_shape_pool0 = out_shape_conv0; + out_shape_pool0.set(0, out_shape_pool0.x() / 2); + out_shape_pool0.set(1, out_shape_pool0.y() / 2); + out_pool0.allocator()->init(TensorInfo(out_shape_pool0, 1, DataType::F32)); + + // Initialize tensors of conv1 + constexpr unsigned int kernel_x_conv1 = 3; + constexpr unsigned int kernel_y_conv1 = 3; + constexpr unsigned int ofm_conv1 = 16; + + const TensorShape weights_shape_conv1(kernel_x_conv1, kernel_y_conv1, out_shape_pool0.z(), ofm_conv1); + + const TensorShape biases_shape_conv1(weights_shape_conv1[3]); + const TensorShape out_shape_conv1(out_shape_pool0.x(), out_shape_pool0.y(), weights_shape_conv1[3]); + + weights1.allocator()->init(TensorInfo(weights_shape_conv1, 1, DataType::F32)); + biases1.allocator()->init(TensorInfo(biases_shape_conv1, 1, DataType::F32)); + out_conv1.allocator()->init(TensorInfo(out_shape_conv1, 1, DataType::F32)); + + // Initialize tensor of act1 + out_act1.allocator()->init(TensorInfo(out_shape_conv1, 1, DataType::F32)); + + // Initialize tensor of pool1 + TensorShape out_shape_pool1 = out_shape_conv1; + out_shape_pool1.set(0, out_shape_pool1.x() / 2); + out_shape_pool1.set(1, out_shape_pool1.y() / 2); + out_pool1.allocator()->init(TensorInfo(out_shape_pool1, 1, DataType::F32)); + + // Initialize tensor of fc0 + constexpr unsigned int num_labels = 128; + + const TensorShape weights_shape_fc0(out_shape_pool1.x() * out_shape_pool1.y() * out_shape_pool1.z(), num_labels); + const TensorShape biases_shape_fc0(num_labels); + const TensorShape out_shape_fc0(num_labels); + + weights2.allocator()->init(TensorInfo(weights_shape_fc0, 1, DataType::F32)); + biases2.allocator()->init(TensorInfo(biases_shape_fc0, 1, DataType::F32)); + out_fc0.allocator()->init(TensorInfo(out_shape_fc0, 1, DataType::F32)); + + // Initialize tensor of act2 + out_act2.allocator()->init(TensorInfo(out_shape_fc0, 1, DataType::F32)); + + // Initialize tensor of softmax + const TensorShape out_shape_softmax(out_shape_fc0.x()); + out_softmax.allocator()->init(TensorInfo(out_shape_softmax, 1, DataType::F32)); + + /* -----------------------End: [Initialize tensors] */ + + /* [Configure functions] */ + + // in:32x32x1: 5x5 convolution, 8 output features maps (OFM) + conv0.configure(&src, &weights0, &biases0, &out_conv0, PadStrideInfo()); + + // in:32x32x8, out:32x32x8, Activation function: relu + act0.configure(&out_conv0, &out_act0, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + + // in:32x32x8, out:16x16x8 (2x2 pooling), Pool type function: Max + pool0.configure(&out_act0, &out_pool0, PoolingLayerInfo(PoolingType::MAX, 2)); + + // in:16x16x8: 3x3 convolution, 16 output features maps (OFM) + conv1.configure(&out_pool0, &weights1, &biases1, &out_conv1, PadStrideInfo()); + + // in:16x16x16, out:16x16x16, Activation function: relu + act1.configure(&out_conv1, &out_act1, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + + // in:16x16x16, out:8x8x16 (2x2 pooling), Pool type function: Average + pool1.configure(&out_act1, &out_pool1, PoolingLayerInfo(PoolingType::AVG, 2)); + + // in:8x8x16, out:128 + fc0.configure(&out_pool1, &weights2, &biases2, &out_fc0); + + // in:128, out:128, Activation function: relu + act2.configure(&out_fc0, &out_act2, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + + // in:128, out:128 + softmax.configure(&out_act2, &out_softmax); + + /* -----------------------End: [Configure functions] */ + + /* [Allocate tensors] */ + + // Now that the padding requirements are known we can allocate the images: + src.allocator()->allocate(); + weights0.allocator()->allocate(); + weights1.allocator()->allocate(); + weights2.allocator()->allocate(); + biases0.allocator()->allocate(); + biases1.allocator()->allocate(); + biases2.allocator()->allocate(); + out_conv0.allocator()->allocate(); + out_conv1.allocator()->allocate(); + out_act0.allocator()->allocate(); + out_act1.allocator()->allocate(); + out_act2.allocator()->allocate(); + out_pool0.allocator()->allocate(); + out_pool1.allocator()->allocate(); + out_fc0.allocator()->allocate(); + out_softmax.allocator()->allocate(); + + /* -----------------------End: [Allocate tensors] */ + + /* [Initialize weights and biases tensors] */ + + // Once the tensors have been allocated, the src, weights and biases tensors can be initialized + // ... + + /* -----------------------[Initialize weights and biases tensors] */ + + /* [Execute the functions] */ + + conv0.run(); + act0.run(); + pool0.run(); + conv1.run(); + act1.run(); + pool1.run(); + fc0.run(); + act2.run(); + softmax.run(); + + /* -----------------------End: [Execute the functions] */ +} + +/** Main program for cnn test + * + * The example implements the following CNN architecture: + * + * Input -> conv0:5x5 -> act0:relu -> pool:2x2 -> conv1:3x3 -> act1:relu -> pool:2x2 -> fc0 -> act2:relu -> softmax + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments + */ +int main(int argc, const char **argv) +{ + return utils::run_example(argc, argv, main_cnn); +}
\ No newline at end of file diff --git a/examples/neon_convolution.cpp b/examples/neon_convolution.cpp new file mode 100644 index 0000000000..222c8f9a37 --- /dev/null +++ b/examples/neon_convolution.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/NEFunctions.h" + +#include "arm_compute/core/Types.h" +#include "utils/Utils.h" + +using namespace arm_compute; +using namespace utils; + +/** Gaussian 3x3 matrix + */ +const int16_t gaussian3x3[] = +{ + 1, 2, 1, + 2, 4, 2, + 1, 2, 1 +}; + +/** Gaussian 5x5 matrix + */ +const int16_t gaussian5x5[] = +{ + 1, 4, 6, 4, 1, + 4, 16, 24, 16, 4, + 6, 24, 36, 24, 6, + 4, 16, 24, 16, 4, + 1, 4, 6, 4, 1 +}; + +void main_neon_convolution(int argc, const char **argv) +{ + /** [Accurate padding] **/ + PPMLoader ppm; + Image src, tmp, dst; + + if(argc < 2) + { + // Print help + std::cout << "Usage: ./build/neon_convolution [input_image.ppm]\n\n"; + std::cout << "No input_image provided, creating a dummy 640x480 image\n"; + // Initialize just the dimensions and format of your buffers: + src.allocator()->init(TensorInfo(640, 480, Format::U8)); + } + else + { + ppm.open(argv[1]); + // Initialize just the dimensions and format of your buffers: + ppm.init_image(src, Format::U8); + } + + // Initialize just the dimensions and format of the temporary and destination images: + tmp.allocator()->init(*src.info()); + dst.allocator()->init(*src.info()); + + NEConvolution3x3 conv3x3; + NEConvolution5x5 conv5x5; + + // Apply a Gaussian 3x3 filter to the source image followed by a Gaussian 5x5: + // The function will automatically update the padding information inside input and output to match its requirements + conv3x3.configure(&src, &tmp, gaussian3x3, 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED); + conv5x5.configure(&tmp, &dst, gaussian5x5, 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED); + + // Now that the padding requirements are known we can allocate the images: + src.allocator()->allocate(); + tmp.allocator()->allocate(); + dst.allocator()->allocate(); + + // Fill the input image with the content of the PPM image if a filename was provided: + if(ppm.is_open()) + { + ppm.fill_image(src); + } + + //Execute the functions: + conv3x3.run(); + conv5x5.run(); + + // Save the result to file: + if(ppm.is_open()) + { + const std::string output_filename = std::string(argv[1]) + "_out.ppm"; + save_to_ppm(dst, output_filename); + } + /** [Accurate padding] **/ +} + +/** Main program for convolution test + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments ( [optional] Path to PPM image to process ) + */ +int main(int argc, const char **argv) +{ + return utils::run_example(argc, argv, main_neon_convolution); +} diff --git a/examples/neon_copy_objects.cpp b/examples/neon_copy_objects.cpp new file mode 100644 index 0000000000..191f455557 --- /dev/null +++ b/examples/neon_copy_objects.cpp @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/NEON/NEFunctions.h" + +#include "arm_compute/core/Types.h" +#include "utils/Utils.h" + +#include <cstring> +#include <iostream> + +using namespace arm_compute; + +void main_neon_copy_objects(int argc, const char **argv) +{ + ARM_COMPUTE_UNUSED(argc); + ARM_COMPUTE_UNUSED(argv); + + /** [Copy objects example] */ + constexpr unsigned int width = 4; + constexpr unsigned int height = 3; + constexpr unsigned int batch = 2; + + auto *src_data = new float[width * height * batch]; + auto *dst_data = new float[width * height * batch]; + + // Fill src_data with dummy values: + for(unsigned int b = 0; b < batch; b++) + { + for(unsigned int h = 0; h < height; h++) + { + for(unsigned int w = 0; w < width; w++) + { + src_data[b * (width * height) + h * width + w] = static_cast<float>(100 * b + 10 * h + w); + } + } + } + + Tensor input, output; + NESoftmaxLayer softmax; + + // Initialize the tensors dimensions and type: + const TensorShape shape(width, height, batch); + input.allocator()->init(TensorInfo(shape, 1, DataType::F32)); + output.allocator()->init(TensorInfo(shape, 1, DataType::F32)); + + // Configure softmax: + softmax.configure(&input, &output); + + // Allocate the input / output tensors: + input.allocator()->allocate(); + output.allocator()->allocate(); + + // Fill the input tensor: + // Simplest way: create an iterator to iterate through each element of the input tensor: + Window input_window; + input_window.use_tensor_dimensions(input.info()); + std::cout << " Dimensions of the input's iterator:\n"; + std::cout << " X = [start=" << input_window.x().start() << ", end=" << input_window.x().end() << ", step=" << input_window.x().step() << "]\n"; + std::cout << " Y = [start=" << input_window.y().start() << ", end=" << input_window.y().end() << ", step=" << input_window.y().step() << "]\n"; + std::cout << " Z = [start=" << input_window.z().start() << ", end=" << input_window.z().end() << ", step=" << input_window.z().step() << "]\n"; + + // Create an iterator: + Iterator input_it(&input, input_window); + + // Iterate through the elements of src_data and copy them one by one to the input tensor: + // This is equivalent to: + // for( unsigned int z = 0; z < batch; ++z) + // { + // for( unsigned int y = 0; y < height; ++y) + // { + // for( unsigned int x = 0; x < width; ++x) + // { + // *reinterpret_cast<float*>( input.buffer() + input.info()->offset_element_in_bytes(Coordinates(x,y,z))) = src_data[ z * (width*height) + y * width + x]; + // } + // } + // } + // Except it works for an arbitrary number of dimensions + execute_window_loop(input_window, [&](const Coordinates & id) + { + std::cout << "Setting item [" << id.x() << "," << id.y() << "," << id.z() << "]\n"; + *reinterpret_cast<float *>(input_it.ptr()) = src_data[id.z() * (width * height) + id.y() * width + id.x()]; + }, + input_it); + + // Run NEON softmax: + softmax.run(); + + // More efficient way: create an iterator to iterate through each row (instead of each element) of the output tensor: + Window output_window; + output_window.use_tensor_dimensions(output.info(), /* first_dimension =*/Window::DimY); // Iterate through the rows (not each element) + std::cout << " Dimensions of the output's iterator:\n"; + std::cout << " X = [start=" << output_window.x().start() << ", end=" << output_window.x().end() << ", step=" << output_window.x().step() << "]\n"; + std::cout << " Y = [start=" << output_window.y().start() << ", end=" << output_window.y().end() << ", step=" << output_window.y().step() << "]\n"; + std::cout << " Z = [start=" << output_window.z().start() << ", end=" << output_window.z().end() << ", step=" << output_window.z().step() << "]\n"; + + // Create an iterator: + Iterator output_it(&output, output_window); + + // Iterate through the rows of the output tensor and copy them to dst_data: + // This is equivalent to: + // for( unsigned int z = 0; z < batch; ++z) + // { + // for( unsigned int y = 0; y < height; ++y) + // { + // memcpy( dst_data + z * (width*height) + y * width, input.buffer() + input.info()->offset_element_in_bytes(Coordinates(0,y,z)), width * sizeof(float)); + // } + // } + // Except it works for an arbitrary number of dimensions + execute_window_loop(output_window, [&](const Coordinates & id) + { + std::cout << "Copying one row starting from [" << id.x() << "," << id.y() << "," << id.z() << "]\n"; + // Copy one whole row: + memcpy(dst_data + id.z() * (width * height) + id.y() * width, output_it.ptr(), width * sizeof(float)); + }, + output_it); + + delete[] src_data; + delete[] dst_data; + /** [Copy objects example] */ +} + +/** Main program for the copy objects test + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments + */ +int main(int argc, const char **argv) +{ + return utils::run_example(argc, argv, main_neon_copy_objects); +} diff --git a/examples/neon_scale.cpp b/examples/neon_scale.cpp new file mode 100644 index 0000000000..75780c9bdb --- /dev/null +++ b/examples/neon_scale.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/NEFunctions.h" + +#include "arm_compute/core/Types.h" +#include "utils/Utils.h" + +using namespace arm_compute; +using namespace utils; + +void main_neon_scale(int argc, const char **argv) +{ + PPMLoader ppm; + Image src, dst; + + if(argc < 2) + { + // Print help + std::cout << "Usage: ./build/neon_scale[input_image.ppm]\n\n"; + std::cout << "No input_image provided, creating a dummy 640x480 image\n"; + // Create an empty grayscale 640x480 image + src.allocator()->init(TensorInfo(640, 480, Format::U8)); + } + else + { + ppm.open(argv[1]); + ppm.init_image(src, Format::U8); + } + + constexpr int scale_factor = 2; + + TensorInfo dst_tensor_info(src.info()->dimension(0) / scale_factor, src.info()->dimension(1) / scale_factor, Format::U8); + + // Configure the destination image + dst.allocator()->init(dst_tensor_info); + + // Create and initialize a Scale function object: + NEScale scale; + scale.configure(&src, &dst, InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED); + + // Allocate all the images + src.allocator()->allocate(); + dst.allocator()->allocate(); + // Fill the input image with the content of the PPM image if a filename was provided: + if(ppm.is_open()) + { + ppm.fill_image(src); + } + + // Run the scale operation: + scale.run(); + + // Save the result to file: + if(ppm.is_open()) + { + const std::string output_filename = std::string(argv[1]) + "_out.ppm"; + save_to_ppm(dst, output_filename); + } +} + +/** Main program for convolution test + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments ( [optional] Path to PPM image to process ) + */ +int main(int argc, const char **argv) +{ + return utils::run_example(argc, argv, main_neon_scale); +} diff --git a/examples/neoncl_scale_median_gaussian.cpp b/examples/neoncl_scale_median_gaussian.cpp new file mode 100644 index 0000000000..a32ba6daf6 --- /dev/null +++ b/examples/neoncl_scale_median_gaussian.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#define ARM_COMPUTE_CL /* So that OpenCL exceptions get caught too */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLFunctions.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/NEON/NEFunctions.h" +#include "utils/Utils.h" + +using namespace arm_compute; +using namespace utils; + +/** Example demonstrating how to use both CL and NEON functions in the same pipeline + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments ( [optional] Path to PPM image to process ) + */ +void main_neoncl_scale_median_gaussian(int argc, const char **argv) +{ + /** [NEON / OpenCL Interop] */ + PPMLoader ppm; + CLImage src, scale_median, median_gauss, dst; + + CLScheduler::get().default_init(); + + if(argc < 2) + { + // Print help + std::cout << "Usage: ./build/cl_convolution [input_image.ppm]\n\n"; + std::cout << "No input_image provided, creating a dummy 640x480 image\n"; + // Create an empty grayscale 640x480 image + src.allocator()->init(TensorInfo(640, 480, Format::U8)); + } + else + { + ppm.open(argv[1]); + ppm.init_image(src, Format::U8); + } + + TensorInfo scale_median_info(TensorInfo(src.info()->dimension(0) / 2, src.info()->dimension(1) / 2, Format::U8)); + + // Configure the temporary and destination images + scale_median.allocator()->init(scale_median_info); + median_gauss.allocator()->init(scale_median_info); + dst.allocator()->init(scale_median_info); + + // Declare and configure the functions to create the following pipeline: scale -> median -> gauss + CLScale scale; + NEMedian3x3 median; + CLGaussian5x5 gauss; + + scale.configure(&src, &scale_median, InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::REPLICATE); + median.configure(&scale_median, &median_gauss, BorderMode::REPLICATE); + gauss.configure(&median_gauss, &dst, BorderMode::REPLICATE); + + // Allocate all the images + src.allocator()->allocate(); + scale_median.allocator()->allocate(); + median_gauss.allocator()->allocate(); + dst.allocator()->allocate(); + + // Fill the input image with the content of the PPM image if a filename was provided: + if(ppm.is_open()) + { + ppm.fill_image(src); + } + + // Enqueue and flush the OpenCL kernel: + scale.run(); + + // Do a blocking map of the input and output buffers of the NEON function: + scale_median.map(); + median_gauss.map(); + + // Run the NEON function: + median.run(); + + // Unmap the output buffer before it's used again by OpenCL: + scale_median.unmap(); + median_gauss.unmap(); + + // Run the final OpenCL function: + gauss.run(); + + // Make sure all the OpenCL jobs are done executing: + CLScheduler::get().sync(); + + // Save the result to file: + if(ppm.is_open()) + { + const std::string output_filename = std::string(argv[1]) + "_out.ppm"; + save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM + } + /** [NEON / OpenCL Interop] */ +} + +/** Main program for convolution test + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments ( [optional] Path to PPM image to process ) + */ +int main(int argc, const char **argv) +{ + return utils::run_example(argc, argv, main_neoncl_scale_median_gaussian); +} |