diff options
-rw-r--r-- | arm_compute/core/CL/kernels/CLMeanStdDevKernel.h | 3 | ||||
-rw-r--r-- | arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h | 3 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLMeanStdDev.h | 10 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEMeanStdDev.h | 10 | ||||
-rw-r--r-- | src/core/CL/kernels/CLMeanStdDevKernel.cpp | 10 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEMeanStdDevKernel.cpp | 10 | ||||
-rw-r--r-- | src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 2 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLMeanStdDev.cpp | 6 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEMeanStdDev.cpp | 7 | ||||
-rw-r--r-- | tests/AssetsLibrary.h | 37 | ||||
-rw-r--r-- | tests/CL/CLAccessor.h | 6 | ||||
-rw-r--r-- | tests/IAccessor.h | 3 | ||||
-rw-r--r-- | tests/NEON/Accessor.h | 6 | ||||
-rw-r--r-- | tests/RawTensor.cpp | 5 | ||||
-rw-r--r-- | tests/RawTensor.h | 3 | ||||
-rw-r--r-- | tests/validation/NEON/MeanStdDev.cpp | 4 | ||||
-rw-r--r-- | tests/validation_new/SimpleTensor.h | 9 |
17 files changed, 116 insertions, 18 deletions
diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h index 9f30f76e1b..2c09590dab 100644 --- a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h +++ b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h @@ -63,12 +63,15 @@ public: // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + private: const ICLImage *_input; float *_mean; float *_stddev; cl::Buffer *_global_sum; cl::Buffer *_global_sum_squared; + BorderSize _border_size; }; } #endif /* __ARM_COMPUTE_CLMEANSTDDEVKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h index 83407ccb7d..9c72b20d58 100644 --- a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h +++ b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h @@ -64,6 +64,8 @@ public: // Inherited methods overridden: void run(const Window &window) override; + BorderSize border_size() const override; + private: const IImage *_input; float *_mean; @@ -71,6 +73,7 @@ private: uint64_t *_global_sum; uint64_t *_global_sum_squared; std::mutex _mtx; + BorderSize _border_size; }; } #endif /* __ARM_COMPUTE_NEMEANSTDDEVKERNEL_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h index e33bcdd779..7622138236 100644 --- a/arm_compute/runtime/CL/functions/CLMeanStdDev.h +++ b/arm_compute/runtime/CL/functions/CLMeanStdDev.h @@ -25,6 +25,7 @@ #define __ARM_COMPUTE_CLMEANSTDDEV_H__ #include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" #include "arm_compute/runtime/IFunction.h" @@ -38,17 +39,18 @@ public: CLMeanStdDev(); /** Initialise the kernel's inputs and outputs. * - * @param[in] input Input image. Data types supported: U8. - * @param[out] mean Output average pixel value. - * @param[out] stddev (Optional)Output standard deviation of pixel values. + * @param[in, out] input Input image. Data types supported: U8. (Written to only for border filling) + * @param[out] mean Output average pixel value. + * @param[out] stddev (Optional)Output standard deviation of pixel values. */ - void configure(const ICLImage *input, float *mean, float *stddev = nullptr); + void configure(ICLImage *input, float *mean, float *stddev = nullptr); // Inherited methods overridden: void run() override; private: CLMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ + CLFillBorderKernel _fill_border_kernel; /**< Kernel that fills the border with zeroes. */ cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ }; diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h index 3770b2a270..e8bf8dfa3d 100644 --- a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h +++ b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h @@ -24,6 +24,7 @@ #ifndef __ARM_COMPUTE_NEMEANSTDDEV_H__ #define __ARM_COMPUTE_NEMEANSTDDEV_H__ +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" @@ -44,17 +45,18 @@ public: NEMeanStdDev(); /** Initialise the kernel's inputs and outputs. * - * @param[in] input Input image. Data type supported: U8. - * @param[out] mean Output average pixel value. - * @param[out] stddev (Optional) Output standard deviation of pixel values. + * @param[in, out] input Input image. Data types supported: U8. (Written to only for border filling) + * @param[out] mean Output average pixel value. + * @param[out] stddev (Optional) Output standard deviation of pixel values. */ - void configure(const IImage *input, float *mean, float *stddev = nullptr); + void configure(IImage *input, float *mean, float *stddev = nullptr); // Inherited methods overridden: void run() override; private: NEMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ + NEFillBorderKernel _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */ uint64_t _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ uint64_t _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ }; diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp index b0b748f466..1b70d7513f 100644 --- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp @@ -40,10 +40,15 @@ using namespace arm_compute; CLMeanStdDevKernel::CLMeanStdDevKernel() - : _input(nullptr), _mean(nullptr), _stddev(nullptr), _global_sum(nullptr), _global_sum_squared(nullptr) + : _input(nullptr), _mean(nullptr), _stddev(nullptr), _global_sum(nullptr), _global_sum_squared(nullptr), _border_size(0) { } +BorderSize CLMeanStdDevKernel::border_size() const +{ + return _border_size; +} + void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared) { ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); @@ -83,6 +88,9 @@ void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffe constexpr unsigned int num_elems_processed_per_iteration_x = 8; const unsigned int num_elems_processed_per_iteration_y = input->info()->dimension(1); + _border_size = BorderSize(std::max(static_cast<int>(num_elems_processed_per_iteration_x) - static_cast<int>(input->info()->dimension(0)), + static_cast<int>(input->info()->dimension(0) % num_elems_processed_per_iteration_x))); + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); update_window_and_padding(win, input_access); diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp index 4616203d66..1eb7e45e36 100644 --- a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp @@ -85,10 +85,15 @@ std::pair<uint64x1_t, uint64x1_t> accumulate(const Window &window, Iterator &ite } // namespace NEMeanStdDevKernel::NEMeanStdDevKernel() - : _input(nullptr), _mean(nullptr), _stddev(nullptr), _global_sum(nullptr), _global_sum_squared(nullptr), _mtx() + : _input(nullptr), _mean(nullptr), _stddev(nullptr), _global_sum(nullptr), _global_sum_squared(nullptr), _mtx(), _border_size(0) { } +BorderSize NEMeanStdDevKernel::border_size() const +{ + return _border_size; +} + void NEMeanStdDevKernel::configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev, uint64_t *global_sum_squared) { ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); @@ -105,6 +110,9 @@ void NEMeanStdDevKernel::configure(const IImage *input, float *mean, uint64_t *g constexpr unsigned int num_elems_processed_per_iteration = 16; + _border_size = BorderSize(std::max(static_cast<int>(num_elems_processed_per_iteration) - static_cast<int>(input->info()->dimension(0)), + static_cast<int>(input->info()->dimension(0) % num_elems_processed_per_iteration))); + // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index 79fcba1dfb..176e3d688e 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -224,7 +224,7 @@ void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output) _input = input; _output = output; - _border_size = BorderSize(0, input_width % num_elems_processed_per_iteration, 0, 0); + _border_size = BorderSize(0, num_elems_processed_per_iteration - (input_width % num_elems_processed_per_iteration), 0, 0); // Configure kernel window constexpr unsigned int num_elems_written_per_row = 1; diff --git a/src/runtime/CL/functions/CLMeanStdDev.cpp b/src/runtime/CL/functions/CLMeanStdDev.cpp index 56ba146790..838f7e73d2 100644 --- a/src/runtime/CL/functions/CLMeanStdDev.cpp +++ b/src/runtime/CL/functions/CLMeanStdDev.cpp @@ -23,19 +23,19 @@ */ #include "arm_compute/runtime/CL/functions/CLMeanStdDev.h" -#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" #include "arm_compute/runtime/CL/CLScheduler.h" using namespace arm_compute; CLMeanStdDev::CLMeanStdDev() : _mean_stddev_kernel(), + _fill_border_kernel(), _global_sum(), _global_sum_squared() { } -void CLMeanStdDev::configure(const ICLImage *input, float *mean, float *stddev) +void CLMeanStdDev::configure(ICLImage *input, float *mean, float *stddev) { _global_sum = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong)); @@ -45,9 +45,11 @@ void CLMeanStdDev::configure(const ICLImage *input, float *mean, float *stddev) } _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared); + _fill_border_kernel.configure(input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0))); } void CLMeanStdDev::run() { + CLScheduler::get().enqueue(_fill_border_kernel); CLScheduler::get().enqueue(_mean_stddev_kernel); } diff --git a/src/runtime/NEON/functions/NEMeanStdDev.cpp b/src/runtime/NEON/functions/NEMeanStdDev.cpp index 47143f5e5b..ab8e72bf1d 100644 --- a/src/runtime/NEON/functions/NEMeanStdDev.cpp +++ b/src/runtime/NEON/functions/NEMeanStdDev.cpp @@ -23,19 +23,19 @@ */ #include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h" -#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" #include "arm_compute/runtime/NEON/NEScheduler.h" using namespace arm_compute; NEMeanStdDev::NEMeanStdDev() - : _mean_stddev_kernel(), _global_sum(0), _global_sum_squared(0) + : _mean_stddev_kernel(), _fill_border_kernel(), _global_sum(0), _global_sum_squared(0) { } -void NEMeanStdDev::configure(const IImage *input, float *mean, float *stddev) +void NEMeanStdDev::configure(IImage *input, float *mean, float *stddev) { _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared); + _fill_border_kernel.configure(input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0))); } void NEMeanStdDev::run() @@ -43,5 +43,6 @@ void NEMeanStdDev::run() _global_sum = 0; _global_sum_squared = 0; + _fill_border_kernel.run(_fill_border_kernel.window()); NEScheduler::get().schedule(&_mean_stddev_kernel, Window::DimY); } diff --git a/tests/AssetsLibrary.h b/tests/AssetsLibrary.h index 3dd30e7629..18ffd773c8 100644 --- a/tests/AssetsLibrary.h +++ b/tests/AssetsLibrary.h @@ -150,6 +150,15 @@ public: */ RawTensor get(const std::string &name, Format format, Channel channel); + /** Puts garbage values all around the tensor for testing purposes + * + * @param[in, out] tensor To be filled tensor. + * @param[in] distribution Distribution used to fill the tensor's surroundings. + * @param[in] seed_offset The offset will be added to the global seed before initialising the random generator. + */ + template <typename T, typename D> + void fill_borders_with_garbage(T &&tensor, D &&distribution, std::random_device::result_type seed_offset) const; + /** Fills the specified @p tensor with random values drawn from @p * distribution. * @@ -348,6 +357,32 @@ private: }; template <typename T, typename D> +void AssetsLibrary::fill_borders_with_garbage(T &&tensor, D &&distribution, std::random_device::result_type seed_offset) const +{ + const PaddingSize padding_size = tensor.padding(); + + Window window; + window.set(0, Window::Dimension(-padding_size.left, tensor.shape()[0] + padding_size.right, 1)); + window.set(1, Window::Dimension(-padding_size.top, tensor.shape()[1] + padding_size.bottom, 1)); + + std::mt19937 gen(_seed); + + execute_window_loop(window, [&](const Coordinates & id) + { + TensorShape shape = tensor.shape(); + + // If outside of valid region + if(id.x() < 0 || id.x() >= static_cast<int>(shape.x()) || id.y() < 0 || id.y() >= static_cast<int>(shape.y())) + { + using ResultType = typename std::remove_reference<D>::type::result_type; + const ResultType value = distribution(gen); + void *const out_ptr = tensor(id); + store_value_with_data_type(out_ptr, value, tensor.data_type()); + } + }); +} + +template <typename T, typename D> void AssetsLibrary::fill(T &&tensor, D &&distribution, std::random_device::result_type seed_offset) const { Window window; @@ -366,6 +401,8 @@ void AssetsLibrary::fill(T &&tensor, D &&distribution, std::random_device::resul void *const out_ptr = tensor(id); store_value_with_data_type(out_ptr, value, tensor.data_type()); }); + + fill_borders_with_garbage(tensor, distribution, seed_offset); } template <typename D> diff --git a/tests/CL/CLAccessor.h b/tests/CL/CLAccessor.h index 260da035c5..b1d7a078b1 100644 --- a/tests/CL/CLAccessor.h +++ b/tests/CL/CLAccessor.h @@ -59,6 +59,7 @@ public: DataType data_type() const override; int num_channels() const override; int num_elements() const override; + PaddingSize padding() const override; int fixed_point_position() const override; const void *operator()(const Coordinates &coord) const override; void *operator()(const Coordinates &coord) override; @@ -113,6 +114,11 @@ inline int CLAccessor::num_elements() const return _tensor.info()->tensor_shape().total_size(); } +inline PaddingSize CLAccessor::padding() const +{ + return _tensor.info()->padding(); +} + inline int CLAccessor::fixed_point_position() const { return _tensor.info()->fixed_point_position(); diff --git a/tests/IAccessor.h b/tests/IAccessor.h index 0517981df5..ef06e9e9da 100644 --- a/tests/IAccessor.h +++ b/tests/IAccessor.h @@ -61,6 +61,9 @@ public: /** Number of elements of the tensor. */ virtual int num_elements() const = 0; + /** Available padding around the tensor. */ + virtual PaddingSize padding() const = 0; + /** Number of bits for the fractional part. */ virtual int fixed_point_position() const = 0; diff --git a/tests/NEON/Accessor.h b/tests/NEON/Accessor.h index 5949b350a0..c379018d39 100644 --- a/tests/NEON/Accessor.h +++ b/tests/NEON/Accessor.h @@ -53,6 +53,7 @@ public: DataType data_type() const override; int num_channels() const override; int num_elements() const override; + PaddingSize padding() const override; int fixed_point_position() const override; const void *operator()(const Coordinates &coord) const override; void *operator()(const Coordinates &coord) override; @@ -101,6 +102,11 @@ inline int Accessor::num_elements() const return _tensor.info()->tensor_shape().total_size(); } +inline PaddingSize Accessor::padding() const +{ + return _tensor.info()->padding(); +} + inline int Accessor::fixed_point_position() const { return _tensor.info()->fixed_point_position(); diff --git a/tests/RawTensor.cpp b/tests/RawTensor.cpp index 402b5f3d0b..1d400a58d7 100644 --- a/tests/RawTensor.cpp +++ b/tests/RawTensor.cpp @@ -146,6 +146,11 @@ int RawTensor::num_elements() const return _shape.total_size(); } +PaddingSize RawTensor::padding() const +{ + return PaddingSize(0); +} + const RawTensor::BufferType *RawTensor::data() const { return _buffer.get(); diff --git a/tests/RawTensor.h b/tests/RawTensor.h index 2480917c38..9d65e4f319 100644 --- a/tests/RawTensor.h +++ b/tests/RawTensor.h @@ -114,6 +114,9 @@ public: /** Number of elements of the tensor. */ int num_elements() const; + /** Available padding around the tensor. */ + PaddingSize padding() const; + /** The number of bits for the fractional part of the fixed point numbers. */ int fixed_point_position() const; diff --git a/tests/validation/NEON/MeanStdDev.cpp b/tests/validation/NEON/MeanStdDev.cpp index 5fcd81ec95..d39435f2b3 100644 --- a/tests/validation/NEON/MeanStdDev.cpp +++ b/tests/validation/NEON/MeanStdDev.cpp @@ -134,8 +134,8 @@ BOOST_DATA_TEST_CASE(RunLarge, Large2DShapes(), shape) std::pair<float, float> ref_output = Reference::compute_reference_mean_and_standard_deviation(shape); // Validate output - validate(output.first, ref_output.first); - validate(output.second, ref_output.second, 0.f, 0.001f); + validate(output.first, ref_output.first, 0.f, 0.0001f); + validate(output.second, ref_output.second, 0.f, 0.01f); } BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/validation_new/SimpleTensor.h b/tests/validation_new/SimpleTensor.h index 6392e38e25..61d6f1cd04 100644 --- a/tests/validation_new/SimpleTensor.h +++ b/tests/validation_new/SimpleTensor.h @@ -127,6 +127,9 @@ public: /** Number of elements of the tensor. */ int num_elements() const override; + /** Available padding around the tensor. */ + PaddingSize padding() const override; + /** The number of bits for the fractional part of the fixed point numbers. */ int fixed_point_position() const override; @@ -292,6 +295,12 @@ int SimpleTensor<T>::num_elements() const } template <typename T> +PaddingSize SimpleTensor<T>::padding() const +{ + return PaddingSize(0); +} + +template <typename T> const T *SimpleTensor<T>::data() const { return _buffer.get(); |