diff options
178 files changed, 422 insertions, 348 deletions
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h index 4e156ea78e..19036525d5 100644 --- a/arm_compute/core/CPP/CPPTypes.h +++ b/arm_compute/core/CPP/CPPTypes.h @@ -47,5 +47,12 @@ enum class CPUTarget A75 = (ARMV8_2 | A7x | 0x5), A75_DOT = (A75 | DOT), }; + +struct ThreadInfo +{ + int thread_id{ 0 }; + int num_threads{ 1 }; + CPUTarget cpu{ CPUTarget::INTRINSICS }; +}; } #endif /* __ARM_COMPUTE_CPP_TYPES_H__ */ diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h index 9684408987..2e5600f5ff 100644 --- a/arm_compute/core/CPP/ICPPKernel.h +++ b/arm_compute/core/CPP/ICPPKernel.h @@ -47,20 +47,9 @@ public: * @note The width of the window has to be a multiple of num_elems_processed_per_iteration(). * * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) + * @param[in] info Info about executing thread and CPU. */ - virtual void run(const Window &window) = 0; - - /** Sets the target CPU architecture. - * - * @param[in] target CPU target. - */ - void set_target(CPUTarget target) - { - _target = target; - } - -protected: - CPUTarget _target{ CPUTarget::INTRINSICS }; + virtual void run(const Window &window, const ThreadInfo &info) = 0; }; } #endif /*__ARM_COMPUTE_ICPPKERNEL_H__ */ diff --git a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h index 28edb489ad..5309973f5d 100644 --- a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h +++ b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h @@ -62,7 +62,7 @@ public: void configure(const IImage *input, InternalKeypoint *output, int32_t *num_corner_candidates); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: int32_t *_num_corner_candidates; /**< Number of corner candidates */ diff --git a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h index bcb3026959..2a9c0473bb 100644 --- a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h +++ b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h @@ -60,7 +60,7 @@ public: void configure(IDetectionWindowArray *input_output, float min_distance); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; bool is_parallelisable() const override; private: diff --git a/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h b/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h index b7a7d9ff9f..c6b47a512a 100644 --- a/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h +++ b/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h @@ -56,7 +56,7 @@ public: void configure(InternalKeypoint *in_out, IKeyPointArray *output, const int32_t *num_corner_candidates, float min_distance); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; bool is_parallelisable() const override; private: diff --git a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h b/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h index 9ef93ce67a..0590902fe7 100644 --- a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h +++ b/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h @@ -60,7 +60,7 @@ public: void configure(const ITensor *input1, const ITensor *input2, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the specialised absolute difference functions diff --git a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h index 12be3e4e7a..071e03820c 100644 --- a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h +++ b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h @@ -48,7 +48,7 @@ public: void configure(const ITensor *input, ITensor *accum); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; }; /** Interface for the accumulate weighted kernel @@ -74,7 +74,7 @@ public: void configure(const ITensor *input, float alpha, ITensor *accum); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; protected: float _alpha; @@ -86,7 +86,7 @@ class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel { public: // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; }; #else /* ARM_COMPUTE_ENABLE_FP16 */ using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel; @@ -113,7 +113,7 @@ public: void configure(const ITensor *input, uint32_t shift, ITensor *accum); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: uint32_t _shift; diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h index 2c88debfb4..5a6c4a5dbf 100644 --- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -61,7 +61,7 @@ public: void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: using ActivationFunction = ActivationLayerInfo::ActivationFunction; diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h index e28ad67ca4..d88ad379ba 100644 --- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h +++ b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h @@ -70,7 +70,7 @@ public: void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the specialised add functions diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h index a11bf44458..1a4f0e643e 100644 --- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h +++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h @@ -70,7 +70,7 @@ public: void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the specialised sub functions diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h index 8ac70be727..8e86e35040 100644 --- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h @@ -64,7 +64,7 @@ public: void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: using BatchNormFunction = void(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, const Window &window); diff --git a/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h index b931445419..0882a33ef7 100644 --- a/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h +++ b/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h @@ -57,7 +57,7 @@ public: void configure(const ITensor *input1, const ITensor *input2, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: const ITensor *_input1; /**< Source tensor 1 */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h index e34eb0f5ae..7bbf68c435 100644 --- a/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h +++ b/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h @@ -56,7 +56,7 @@ public: void configure(const ITensor *input, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: const ITensor *_input; /**< Source tensor */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h index d2bae2660c..7cddb205fd 100644 --- a/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h +++ b/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h @@ -57,7 +57,7 @@ public: void configure(const ITensor *input1, const ITensor *input2, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: const ITensor *_input1; /**< Source tensor 1 */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h index 9dea36e7e3..f7e5dc5286 100644 --- a/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h +++ b/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h @@ -57,7 +57,7 @@ public: void configure(const ITensor *input1, const ITensor *input2, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: const ITensor *_input1; /**< Source tensor 1 */ diff --git a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h index 1f0fef03da..14db09a698 100644 --- a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h @@ -42,7 +42,7 @@ public: */ void configure(const ITensor *input, ITensor *output, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; }; @@ -53,7 +53,7 @@ class NEBox3x3FP16Kernel : public NEBox3x3Kernel { public: // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; }; #else /* ARM_COMPUTE_ENABLE_FP16 */ using NEBox3x3FP16Kernel = NEBox3x3Kernel; diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h index b86085f439..0b5ebfb42b 100644 --- a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h +++ b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h @@ -62,7 +62,7 @@ public: virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; protected: /** Common signature for all the specialised gradient functions @@ -129,7 +129,7 @@ public: void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: @@ -178,7 +178,7 @@ public: void configure(ITensor *input, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; bool is_parallelisable() const override; diff --git a/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h b/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h index 8b669a4d28..94bddac6ef 100644 --- a/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h +++ b/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h @@ -71,7 +71,7 @@ public: void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; bool is_parallelisable() const override; private: diff --git a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h b/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h index 0715e1f8cb..42df4b8da1 100644 --- a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h +++ b/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h @@ -68,7 +68,7 @@ public: void configure(const IMultiImage *input, Channel channel, IImage *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Extract one channel from a two channel planar tensor. diff --git a/arm_compute/core/NEON/kernels/NECol2ImKernel.h b/arm_compute/core/NEON/kernels/NECol2ImKernel.h index f6bc2152da..43423c008a 100644 --- a/arm_compute/core/NEON/kernels/NECol2ImKernel.h +++ b/arm_compute/core/NEON/kernels/NECol2ImKernel.h @@ -74,7 +74,7 @@ public: void configure(const ITensor *input, ITensor *output, std::pair<unsigned int, unsigned int> convolved_dims); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Template function to run the col2im diff --git a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h index 2297218117..ded8b31200 100644 --- a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h +++ b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h @@ -76,7 +76,7 @@ public: void configure(const IMultiImage *input, IMultiImage *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: using ColorConvertFunction = void(const void *__restrict input_ptr, void *__restrict output_ptr, const Window &win); diff --git a/arm_compute/core/NEON/kernels/NEConvolutionKernel.h b/arm_compute/core/NEON/kernels/NEConvolutionKernel.h index 588a228a5d..694b58c81f 100644 --- a/arm_compute/core/NEON/kernels/NEConvolutionKernel.h +++ b/arm_compute/core/NEON/kernels/NEConvolutionKernel.h @@ -68,7 +68,7 @@ public: void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: @@ -111,7 +111,7 @@ public: void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: @@ -152,7 +152,7 @@ public: void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: @@ -223,7 +223,7 @@ public: void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h b/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h index 67b8c6052d..8239225259 100644 --- a/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h +++ b/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h @@ -64,7 +64,7 @@ public: void configure(const IImage *input, const IDistribution1D *distribution, IDistribution1D *cumulative_sum, ILut *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; bool is_parallelisable() const override; private: diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h index b22d37bfe6..369ec3de1a 100644 --- a/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h @@ -62,7 +62,7 @@ public: void configure(const ITensor *input, unsigned int depth_offset, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h index ad8d152bbf..6e338e5d95 100644 --- a/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h @@ -71,7 +71,7 @@ public: void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: ITensor *_input; diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h index 4b26fd54e0..095a833ab4 100644 --- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h @@ -56,7 +56,7 @@ public: void configure(const ITensor *input, ITensor *output, const float *min, const float *max); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: const ITensor *_input; diff --git a/arm_compute/core/NEON/kernels/NEDerivativeKernel.h b/arm_compute/core/NEON/kernels/NEDerivativeKernel.h index abb8a894c0..5ffa4afdfa 100644 --- a/arm_compute/core/NEON/kernels/NEDerivativeKernel.h +++ b/arm_compute/core/NEON/kernels/NEDerivativeKernel.h @@ -58,7 +58,7 @@ public: void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NEDilateKernel.h b/arm_compute/core/NEON/kernels/NEDilateKernel.h index 05f148a1fd..156957ceea 100644 --- a/arm_compute/core/NEON/kernels/NEDilateKernel.h +++ b/arm_compute/core/NEON/kernels/NEDilateKernel.h @@ -42,7 +42,7 @@ public: */ void configure(const ITensor *input, ITensor *output, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; }; } diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h index 87788ba389..b68641fdc1 100644 --- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h +++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h @@ -59,7 +59,7 @@ public: void configure(ITensor *input, const ITensor *bias, ITensor *output = nullptr); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: using BiasAccumulateKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output); diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h index 5672782cba..4de6c12218 100644 --- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h @@ -64,7 +64,7 @@ public: void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NEErodeKernel.h b/arm_compute/core/NEON/kernels/NEErodeKernel.h index 86dc217cc0..df0e253f4d 100644 --- a/arm_compute/core/NEON/kernels/NEErodeKernel.h +++ b/arm_compute/core/NEON/kernels/NEErodeKernel.h @@ -42,7 +42,7 @@ public: */ void configure(const ITensor *input, ITensor *output, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; }; } diff --git a/arm_compute/core/NEON/kernels/NEFastCornersKernel.h b/arm_compute/core/NEON/kernels/NEFastCornersKernel.h index b30ef49c44..1241426dc2 100644 --- a/arm_compute/core/NEON/kernels/NEFastCornersKernel.h +++ b/arm_compute/core/NEON/kernels/NEFastCornersKernel.h @@ -59,7 +59,7 @@ public: void configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NEFillArrayKernel.h b/arm_compute/core/NEON/kernels/NEFillArrayKernel.h index 38f8a01288..4f1ac2bc78 100644 --- a/arm_compute/core/NEON/kernels/NEFillArrayKernel.h +++ b/arm_compute/core/NEON/kernels/NEFillArrayKernel.h @@ -61,7 +61,7 @@ public: void configure(const IImage *input, uint8_t threshold, IKeyPointArray *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; bool is_parallelisable() const override; private: diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h index 3ec66115e2..50cb417eb0 100644 --- a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h +++ b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h @@ -62,7 +62,7 @@ public: void configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: template <typename T> diff --git a/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h index 61e6e46463..6922c332cc 100644 --- a/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h +++ b/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h @@ -61,7 +61,7 @@ public: void configure(ITensor *input, BorderSize border_size, const PixelValue &constant_border_value = PixelValue()); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: template <typename T> diff --git a/arm_compute/core/NEON/kernels/NEFloorKernel.h b/arm_compute/core/NEON/kernels/NEFloorKernel.h index 3b925b093b..76b02a56ef 100644 --- a/arm_compute/core/NEON/kernels/NEFloorKernel.h +++ b/arm_compute/core/NEON/kernels/NEFloorKernel.h @@ -41,7 +41,7 @@ public: */ void configure(const ITensor *input, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; }; } #endif /*__ARM_COMPUTE_NEFLOORKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h index 84b82d0ffc..74ef3045a4 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h @@ -62,7 +62,7 @@ public: void configure(const ITensor *input, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the transpose functions diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h index ba4dcc3373..8822a97d4d 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h @@ -72,7 +72,7 @@ public: */ void configure(const ITensor *input0, const ITensor *input1, ITensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: const ITensor *_input0; diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h index 1eed4e7a84..202f96744b 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h @@ -53,7 +53,7 @@ public: void configure(ITensor *accum, const ITensor *biases); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: ITensor *_accum; diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h index 5cdcc95ee9..79ccb913a4 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h @@ -62,7 +62,7 @@ public: void configure(const ITensor *input, ITensor *output, float beta); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the matrix addition functions diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h index e82fc6f5d7..afa457d8ba 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h @@ -63,7 +63,7 @@ public: void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: const ITensor *_input0; diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h index 22c07e5c9a..58140bcd1d 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h @@ -76,7 +76,7 @@ public: void configure(const ITensor *input, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; }; } #endif /*__ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h index 763fab88f6..8ed3183176 100644 --- a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h @@ -43,7 +43,7 @@ public: void configure(const ITensor *input, ITensor *output, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; }; } diff --git a/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h index 86b28907da..094e6d1885 100644 --- a/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h +++ b/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h @@ -46,7 +46,7 @@ public: void configure(const ITensor *input, ITensor *output, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: @@ -66,7 +66,7 @@ public: void configure(const ITensor *input, ITensor *output, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; }; } diff --git a/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h b/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h index 40a6aa7375..ce03aa289a 100644 --- a/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h +++ b/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h @@ -56,7 +56,7 @@ public: void configure(const ITensor *input, ITensor *output, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: @@ -90,7 +90,7 @@ public: void configure(const ITensor *input, ITensor *output, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h index dd85778b8a..844bc33df1 100644 --- a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h +++ b/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h @@ -59,7 +59,7 @@ public: void configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the specialised block normalization functions @@ -113,7 +113,7 @@ public: void configure(const ITensor *input, ITensor *output, const HOGInfo *hog_info); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the specialised block normalization functions diff --git a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h index b6b9d3458a..6495ee74ac 100644 --- a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h +++ b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h @@ -63,7 +63,7 @@ public: void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: const ITensor *_input; diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h index 88d70657f6..4a468c220c 100644 --- a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h +++ b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h @@ -89,7 +89,7 @@ public: // Inherited methods overridden: void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override; BorderSize border_size() const override; - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the specialised harris score functions */ @@ -110,7 +110,7 @@ public: // Inherited methods overridden: void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override; BorderSize border_size() const override; - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, diff --git a/arm_compute/core/NEON/kernels/NEHistogramKernel.h b/arm_compute/core/NEON/kernels/NEHistogramKernel.h index 2c0f34b5a7..75d00e9d31 100644 --- a/arm_compute/core/NEON/kernels/NEHistogramKernel.h +++ b/arm_compute/core/NEON/kernels/NEHistogramKernel.h @@ -77,7 +77,7 @@ public: void configure(const IImage *input, IDistribution1D *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Function to merge multiple partial histograms. @@ -95,15 +95,17 @@ private: void merge_min(uint8_t *global_min, const uint8_t &local_min); /** Function to perform histogram on the given window * - * @param[in] win Region on which to execute the kernel + * @param[in] win Region on which to execute the kernel + * @param[in] info Info about the executing thread */ - void histogram_U8(Window win); + void histogram_U8(Window win, const ThreadInfo &info); /** Function to perform histogram on the given window where histogram is * of fixed size 256 without ranges and offsets. * - * @param[in] win Region on which to execute the kernel + * @param[in] win Region on which to execute the kernel + * @param[in] info Info about the executing thread */ - void histogram_fixed_U8(Window win); + void histogram_fixed_U8(Window win, const ThreadInfo &info); /** Pre-calculate the pixel windowing for every possible pixel * * Calculate (V - offset) * numBins / range where V is every possible pixel value. @@ -115,7 +117,7 @@ private: * * @param[in] window Region on which to execute the kernel. */ - using HistogramFunctionPtr = void (NEHistogramKernel::*)(Window window); + using HistogramFunctionPtr = void (NEHistogramKernel::*)(Window window, const ThreadInfo &info); HistogramFunctionPtr _func; ///< Histogram function to use for the particular image types passed to configure() const IImage *_input; diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h index 87d7cc0a8b..8ef27ea52d 100644 --- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h +++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h @@ -82,7 +82,7 @@ public: void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Template function to run the im2col optimised for the fully connected layer case diff --git a/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h b/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h index 13647889ab..b3493f41d7 100644 --- a/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h +++ b/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h @@ -42,7 +42,7 @@ public: void configure(const ITensor *input, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; bool is_parallelisable() const override; }; diff --git a/arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h b/arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h index 545aeaa1d3..fa1795510e 100644 --- a/arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h +++ b/arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h @@ -57,7 +57,7 @@ public: void configure(const ITensor *input, const ITensor *sum, ITensor *output, unsigned int axis, float epsilon); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: const ITensor *_input; diff --git a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h index 9ab7f91092..d4e1cbafb7 100644 --- a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h +++ b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h @@ -91,7 +91,7 @@ public: size_t level, size_t num_levels, float pyramid_scale); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h index a2c6b51101..7044b2e4aa 100644 --- a/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h +++ b/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h @@ -53,7 +53,7 @@ public: void configure(const ITensor *input0, const ITensor *input1, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: const ITensor *_input0; diff --git a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h index 8285e5ed39..94ad0aa27c 100644 --- a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h +++ b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h @@ -61,7 +61,7 @@ public: void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Function to perform magnitude on the given window @@ -125,7 +125,7 @@ public: void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Function to perform magnitude on the given window diff --git a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h index ce2c5eb4fa..0106d813f7 100644 --- a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h +++ b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h @@ -62,7 +62,7 @@ public: void configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev = nullptr, uint64_t *global_sum_squared = nullptr); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; diff --git a/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h index dee1aadfb9..66bfc5dcdd 100644 --- a/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h @@ -43,7 +43,7 @@ public: void configure(const ITensor *input, ITensor *output, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; }; } diff --git a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h b/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h index ddaa01773b..1d9aaf5df7 100644 --- a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h +++ b/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h @@ -63,7 +63,7 @@ public: void reset(); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Performs the min/max algorithm on U8 images on a given window. @@ -130,7 +130,7 @@ public: uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; bool is_parallelisable() const override; private: diff --git a/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h b/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h index ede0294a73..a9f659c9b8 100644 --- a/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h +++ b/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h @@ -60,7 +60,7 @@ public: void configure(const ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h index 68f11d8ffb..e2b691425e 100644 --- a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h @@ -61,7 +61,7 @@ public: void configure(const ITensor *input, ITensor *output, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; protected: diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h index e24e481f46..00750ebe97 100644 --- a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h @@ -59,7 +59,7 @@ public: void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h index bf96c9026c..35dda29c67 100644 --- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h +++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h @@ -64,7 +64,7 @@ public: void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the specialised multiplication functions with integer scaling factor diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h index 8a938a7f34..956db6a67c 100644 --- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -55,7 +55,7 @@ public: void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h index 9867e1db03..92cd142653 100644 --- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h @@ -56,7 +56,7 @@ public: void configure(const ITensor *input, ITensor *output, const float *min, const float *max); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: const ITensor *_input; diff --git a/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h index 40f79acc79..5610420491 100644 --- a/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h @@ -64,7 +64,7 @@ public: void configure(const ITensor *input, const IROIArray *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: const ITensor *_input; diff --git a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h index 147e8351d6..e41f415ead 100644 --- a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h +++ b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h @@ -55,7 +55,7 @@ public: */ void configure(const ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NERemapKernel.h b/arm_compute/core/NEON/kernels/NERemapKernel.h index f9eae68ee8..eeeef36b6e 100644 --- a/arm_compute/core/NEON/kernels/NERemapKernel.h +++ b/arm_compute/core/NEON/kernels/NERemapKernel.h @@ -59,7 +59,7 @@ public: void configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** function to perform nearest interpolation on the given window */ diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h index 03e26520b5..3cac023d8a 100644 --- a/arm_compute/core/NEON/kernels/NEScaleKernel.h +++ b/arm_compute/core/NEON/kernels/NEScaleKernel.h @@ -63,7 +63,7 @@ public: void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output, InterpolationPolicy policy, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h b/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h index c618456d49..78f0682b7a 100644 --- a/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h @@ -68,7 +68,7 @@ public: void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h b/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h index 246dd83573..8e5ba86b80 100644 --- a/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h @@ -68,7 +68,7 @@ public: void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h b/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h index 49c1c41e6d..5dfce8d798 100644 --- a/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h +++ b/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h @@ -61,7 +61,7 @@ public: void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: @@ -103,7 +103,7 @@ public: void configure(ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h b/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h index 4bff8596b8..b04aaaeed0 100644 --- a/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h +++ b/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h @@ -61,7 +61,7 @@ public: void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: @@ -107,7 +107,7 @@ public: void configure(const ITensor *input_x, const ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: diff --git a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h index 2caef55498..4897a39421 100644 --- a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h @@ -45,7 +45,7 @@ public: void configure(const ITensor *input, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; BorderSize border_size() const override; private: @@ -82,7 +82,7 @@ public: void configure(const ITensor *input, const ITensor *max, ITensor *output, ITensor *sum); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: using Logits1DShiftExpSumFunction = void(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window); @@ -120,7 +120,7 @@ public: void configure(const ITensor *input, const ITensor *sum, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: using Logits1DNormFunction = void(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window); diff --git a/arm_compute/core/NEON/kernels/NETableLookupKernel.h b/arm_compute/core/NEON/kernels/NETableLookupKernel.h index b3963e5a75..2cac5c4957 100644 --- a/arm_compute/core/NEON/kernels/NETableLookupKernel.h +++ b/arm_compute/core/NEON/kernels/NETableLookupKernel.h @@ -54,7 +54,7 @@ public: void configure(const ITensor *input, const ILut *lut, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Perform table lookup on a given window. diff --git a/arm_compute/core/NEON/kernels/NEThresholdKernel.h b/arm_compute/core/NEON/kernels/NEThresholdKernel.h index 778176293f..ab6f6f75d2 100644 --- a/arm_compute/core/NEON/kernels/NEThresholdKernel.h +++ b/arm_compute/core/NEON/kernels/NEThresholdKernel.h @@ -60,7 +60,7 @@ public: void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** run binary thresholding on the given window */ diff --git a/arm_compute/core/NEON/kernels/NETransposeKernel.h b/arm_compute/core/NEON/kernels/NETransposeKernel.h index 2f757f18eb..6399149a9c 100644 --- a/arm_compute/core/NEON/kernels/NETransposeKernel.h +++ b/arm_compute/core/NEON/kernels/NETransposeKernel.h @@ -59,7 +59,7 @@ public: void configure(const ITensor *input, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: /** Common signature for all the transpose functions diff --git a/arm_compute/core/NEON/kernels/NEWarpKernel.h b/arm_compute/core/NEON/kernels/NEWarpKernel.h index 39f6496edb..c24adb80ab 100644 --- a/arm_compute/core/NEON/kernels/NEWarpKernel.h +++ b/arm_compute/core/NEON/kernels/NEWarpKernel.h @@ -58,7 +58,7 @@ public: virtual void configure(const ITensor *input, ITensor *output, const float *matrix, BorderMode border_mode, uint8_t constant_border_value); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; // Inherited methods overridden: BorderSize border_size() const override; diff --git a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h index 6b76d19314..7c4b1fa174 100644 --- a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h +++ b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h @@ -79,7 +79,7 @@ public: void configure(const ITensor *input, const ITensor *bias, ITensor *output); // Inherited methods overridden: - void run(const Window &window) override; + void run(const Window &window, const ThreadInfo &info) override; private: using WeightsReshapeKernel = void(const ITensor *input, const ITensor *bias, ITensor *output, const Window &window); diff --git a/arm_compute/core/Window.h b/arm_compute/core/Window.h index a2fbc8f06f..baf767e7d5 100644 --- a/arm_compute/core/Window.h +++ b/arm_compute/core/Window.h @@ -48,7 +48,7 @@ public: /** Default constructor: create a window containing a single element. */ constexpr Window() - : _dims(), _thread_id(0), _num_threads(1) + : _dims() { } /** Copy constructor @@ -293,38 +293,6 @@ public: { return slide_window_slice<4>(slice); } - /** Sets the ID of the thread that the window is associated with. - * - * @param id ID of the thread that the window is associated with. - */ - void set_thread_id(unsigned int id) - { - _thread_id = id; - } - /** Sets the number of threads dispatched that the window is associated with. - * - * @param num_threads The number of threads dispatched that the window is associated with. - */ - void set_num_threads(unsigned int num_threads) - { - _num_threads = num_threads; - } - /** Get the ID of the thread that the window is associated with. - * - * @return ID of the thread that the window is associated with. - */ - constexpr unsigned int thread_id() const - { - return _thread_id; - } - /** Get the number of threads dispatched that the window is associated with. - * - * @return The number of threads dispatched that the window is associated with. - */ - constexpr unsigned int num_threads() const - { - return _num_threads; - } /* Collapse the dimensions higher than @p first if possible. * @@ -358,8 +326,6 @@ private: private: std::array<Dimension, Coordinates::num_max_dimensions> _dims; - unsigned int _thread_id; - unsigned int _num_threads; }; } #include "Window.inl" diff --git a/arm_compute/core/Window.inl b/arm_compute/core/Window.inl index c33613fcea..01cd988ea0 100644 --- a/arm_compute/core/Window.inl +++ b/arm_compute/core/Window.inl @@ -24,7 +24,7 @@ namespace arm_compute { inline Window::Window(const Window &src) - : _dims(), _thread_id(src._thread_id), _num_threads(src._num_threads) + : _dims() { for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i) { @@ -32,12 +32,12 @@ inline Window::Window(const Window &src) } } -inline constexpr const Window::Dimension &Window::operator[](const size_t dimension) const +inline constexpr const Window::Dimension &Window::operator[](size_t dimension) const { // Precondition: dimension < Coordinates::num_max_dimensions return _dims.at(dimension); } -inline void Window::set(const size_t dimension, const Window::Dimension &dim) +inline void Window::set(size_t dimension, const Window::Dimension &dim) { ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); _dims[dimension] = dim; @@ -72,7 +72,7 @@ inline Window Window::collapse_if_possible(const Window &full_window, size_t fir return collapsed; } -inline void Window::shift(const size_t dimension, const int shift_value) +inline void Window::shift(size_t dimension, int shift_value) { ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); Window::Dimension &d = _dims[dimension]; @@ -94,7 +94,7 @@ inline void Window::adjust(size_t dimension, int adjust_value, bool is_at_start) } } -inline void Window::scale(const size_t dimension, float scale_value) +inline void Window::scale(size_t dimension, float scale_value) { ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); Window::Dimension &d = _dims[dimension]; @@ -103,7 +103,7 @@ inline void Window::scale(const size_t dimension, float scale_value) d = Window::Dimension(d.start() * scale_value, scaled_end, scaled_step); } -inline void Window::set_dimension_step(const size_t dimension, const int step) +inline void Window::set_dimension_step(size_t dimension, int step) { ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); _dims[dimension].set_step(step); @@ -126,7 +126,7 @@ inline constexpr size_t Window::num_iterations(size_t dimension) const return (_dims.at(dimension).end() - _dims.at(dimension).start()) / _dims.at(dimension).step(); } -inline Window Window::split_window(const size_t dimension, const size_t id, const size_t total) const +inline Window Window::split_window(size_t dimension, size_t id, size_t total) const { ARM_COMPUTE_ERROR_ON(id >= total); ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); @@ -201,7 +201,7 @@ inline Window Window::first_slice_window() const return slice; } -inline void Window::use_tensor_dimensions(const ITensorInfo *info, const size_t first_dimension) +inline void Window::use_tensor_dimensions(const ITensorInfo *info, size_t first_dimension) { for(unsigned int n = first_dimension; n < info->num_dimensions(); ++n) { diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h index 341537a390..8351f0cbdd 100644 --- a/arm_compute/runtime/CPP/CPPScheduler.h +++ b/arm_compute/runtime/CPP/CPPScheduler.h @@ -24,7 +24,6 @@ #ifndef __ARM_COMPUTE_CPPSCHEDULER_H__ #define __ARM_COMPUTE_CPPSCHEDULER_H__ -#include "arm_compute/core/CPP/CPPTypes.h" #include "arm_compute/runtime/IScheduler.h" #include <memory> @@ -48,18 +47,6 @@ public: */ unsigned int num_threads() const override; - /** Sets the target CPU architecture. - * - * @param[in] target Target CPU. - */ - void set_target(CPUTarget target); - - /** Return the current CPU target. - * - * @return Target CPU. - */ - CPUTarget target() const; - /** Access the scheduler singleton * * @return The scheduler @@ -82,7 +69,6 @@ private: unsigned int _num_threads; std::unique_ptr<Thread[], void (*)(Thread *)> _threads; - CPUTarget _target; }; } #endif /* __ARM_COMPUTE_CPPSCHEDULER_H__ */ diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h index 39c027c6b7..6078abd06b 100644 --- a/arm_compute/runtime/IScheduler.h +++ b/arm_compute/runtime/IScheduler.h @@ -24,6 +24,8 @@ #ifndef __ARM_COMPUTE_ISCHEDULER_H__ #define __ARM_COMPUTE_ISCHEDULER_H__ +#include "arm_compute/core/CPP/CPPTypes.h" + namespace arm_compute { class ICPPKernel; @@ -32,6 +34,12 @@ class ICPPKernel; class IScheduler { public: + /** Default constructor. */ + IScheduler() + : _target(CPUTarget::INTRINSICS) + { + } + /** Destructor. */ virtual ~IScheduler() = default; /** Sets the number of threads the scheduler will use to run the kernels. @@ -50,6 +58,31 @@ public: * @param[in] split_dimension Dimension along which to split the kernel's execution window. */ virtual void schedule(ICPPKernel *kernel, unsigned int split_dimension) = 0; + + /** Sets the target CPU architecture. + * + * @param[in] target Target CPU. + */ + void set_target(CPUTarget target); + + /** Return the current CPU target. + * + * @return Target CPU. + */ + CPUTarget target() const; + +protected: + CPUTarget _target; }; + +inline void IScheduler::set_target(CPUTarget target) +{ + _target = target; +} + +inline CPUTarget IScheduler::target() const +{ + return _target; +} } #endif /* __ARM_COMPUTE_ISCHEDULER_H__ */ diff --git a/docs/01_library.dox b/docs/01_library.dox index 738579e7c6..de2e3ea8b7 100644 --- a/docs/01_library.dox +++ b/docs/01_library.dox @@ -83,7 +83,7 @@ This is the very basic implementation used in the NEON runtime library by all th @sa CPPScheduler. -@note Some kernels like for example @ref NEHistogramKernel need some local temporary buffer to perform their calculations. In order to avoid memory corruption between threads, the local buffer must be of size: ```memory_needed_per_thread * num_threads``` and each subwindow must be initialized by calling @ref Window::set_thread_id() with a unique thread_id between 0 and num_threads. +@note Some kernels like for example @ref NEHistogramKernel need some local temporary buffer to perform their calculations. In order to avoid memory corruption between threads, the local buffer must be of size: ```memory_needed_per_thread * num_threads``` and a unique thread_id between 0 and num_threads must be assigned to the @ref ThreadInfo object passed to the ```run``` function. @subsection S4_2_4 Functions diff --git a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp index c7ac753672..418d349830 100644 --- a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp +++ b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp @@ -93,8 +93,9 @@ void CPPCornerCandidatesKernel::configure(const IImage *input, InternalKeypoint INEKernel::configure(win); } -void CPPCornerCandidatesKernel::run(const Window &window) +void CPPCornerCandidatesKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); Iterator input(_input, window); diff --git a/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp b/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp index 62bfdd60ba..ebe3db983f 100644 --- a/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp +++ b/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp @@ -59,8 +59,9 @@ void CPPDetectionWindowNonMaximaSuppressionKernel::configure(IDetectionWindowArr IKernel::configure(Window()); // Default 1 iteration window } -void CPPDetectionWindowNonMaximaSuppressionKernel::run(const Window &window) +void CPPDetectionWindowNonMaximaSuppressionKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IKernel::window(), window); ARM_COMPUTE_ERROR_ON(_input_output->buffer() == nullptr); diff --git a/src/core/CPP/kernels/CPPSortEuclideanDistanceKernel.cpp b/src/core/CPP/kernels/CPPSortEuclideanDistanceKernel.cpp index 09d3ccffa4..3b1c7aeb3d 100644 --- a/src/core/CPP/kernels/CPPSortEuclideanDistanceKernel.cpp +++ b/src/core/CPP/kernels/CPPSortEuclideanDistanceKernel.cpp @@ -68,8 +68,9 @@ bool CPPSortEuclideanDistanceKernel::is_parallelisable() const return false; } -void CPPSortEuclideanDistanceKernel::run(const Window &window) +void CPPSortEuclideanDistanceKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICPPKernel::window(), window); diff --git a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp index edb0a0f304..e0c2891592 100644 --- a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp +++ b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp @@ -201,8 +201,9 @@ void NEAbsoluteDifferenceKernel::configure(const ITensor *input1, const ITensor INEKernel::configure(win); } -void NEAbsoluteDifferenceKernel::run(const Window &window) +void NEAbsoluteDifferenceKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEAccumulateKernel.cpp b/src/core/NEON/kernels/NEAccumulateKernel.cpp index 6e54dd64a3..deafabe1d4 100644 --- a/src/core/NEON/kernels/NEAccumulateKernel.cpp +++ b/src/core/NEON/kernels/NEAccumulateKernel.cpp @@ -114,8 +114,9 @@ void acc_we_v16_u8(const void *__restrict input, void *__restrict accum, float16 } } // namespace fp16 -void NEAccumulateWeightedFP16Kernel::run(const Window &window) +void NEAccumulateWeightedFP16Kernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); @@ -262,8 +263,9 @@ void NEAccumulateKernel::configure(const ITensor *input, ITensor *accum) INESimpleKernel::configure(input, accum, num_elems_processed_per_iteration); } -void NEAccumulateKernel::run(const Window &window) +void NEAccumulateKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); Iterator input(_input, window); @@ -300,8 +302,9 @@ void NEAccumulateWeightedKernel::configure(const ITensor *input, float alpha, IT INESimpleKernel::configure(input, accum, num_elems_processed_per_iteration); } -void NEAccumulateWeightedKernel::run(const Window &window) +void NEAccumulateWeightedKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); @@ -342,8 +345,9 @@ void NEAccumulateSquaredKernel::configure(const ITensor *input, uint32_t shift, INESimpleKernel::configure(input, accum, num_elems_processed_per_iteration); } -void NEAccumulateSquaredKernel::run(const Window &window) +void NEAccumulateSquaredKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); Iterator input(_input, window); diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index 4ff26c0c67..67fc45bc70 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -653,8 +653,9 @@ typename std::enable_if<std::is_same<T, qint16_t>::value, void>::type NEActivati input, output); } -void NEActivationLayerKernel::run(const Window &window) +void NEActivationLayerKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index 7f7e45a940..f263fd0df2 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -459,8 +459,9 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor INEKernel::configure(win); } -void NEArithmeticAdditionKernel::run(const Window &window) +void NEArithmeticAdditionKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index be8574317b..85f72c1421 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -452,8 +452,9 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens INEKernel::configure(win); } -void NEArithmeticSubtractionKernel::run(const Window &window) +void NEArithmeticSubtractionKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp index 66f174e883..f6f6f9cb61 100644 --- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp @@ -283,8 +283,9 @@ void NEBatchNormalizationLayerKernel::configure(ITensor *input, ITensor *output, INEKernel::configure(win); } -void NEBatchNormalizationLayerKernel::run(const Window &window) +void NEBatchNormalizationLayerKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEBitwiseAndKernel.cpp b/src/core/NEON/kernels/NEBitwiseAndKernel.cpp index e8e448e455..3888300899 100644 --- a/src/core/NEON/kernels/NEBitwiseAndKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseAndKernel.cpp @@ -93,8 +93,9 @@ void NEBitwiseAndKernel::configure(const ITensor *input1, const ITensor *input2, INEKernel::configure(win); } -void NEBitwiseAndKernel::run(const Window &window) +void NEBitwiseAndKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); Iterator input1(_input1, window); diff --git a/src/core/NEON/kernels/NEBitwiseNotKernel.cpp b/src/core/NEON/kernels/NEBitwiseNotKernel.cpp index bf75592677..08d7fe2610 100644 --- a/src/core/NEON/kernels/NEBitwiseNotKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseNotKernel.cpp @@ -81,8 +81,9 @@ void NEBitwiseNotKernel::configure(const ITensor *input, ITensor *output) INEKernel::configure(win); } -void NEBitwiseNotKernel::run(const Window &window) +void NEBitwiseNotKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); Iterator input(_input, window); diff --git a/src/core/NEON/kernels/NEBitwiseOrKernel.cpp b/src/core/NEON/kernels/NEBitwiseOrKernel.cpp index f184be2f26..1b17cc283c 100644 --- a/src/core/NEON/kernels/NEBitwiseOrKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseOrKernel.cpp @@ -93,8 +93,9 @@ void NEBitwiseOrKernel::configure(const ITensor *input1, const ITensor *input2, INEKernel::configure(win); } -void NEBitwiseOrKernel::run(const Window &window) +void NEBitwiseOrKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); Iterator input1(_input1, window); diff --git a/src/core/NEON/kernels/NEBitwiseXorKernel.cpp b/src/core/NEON/kernels/NEBitwiseXorKernel.cpp index c4fb4c0d03..9451e8a08d 100644 --- a/src/core/NEON/kernels/NEBitwiseXorKernel.cpp +++ b/src/core/NEON/kernels/NEBitwiseXorKernel.cpp @@ -89,8 +89,9 @@ void NEBitwiseXorKernel::configure(const ITensor *input1, const ITensor *input2, INEKernel::configure(win); } -void NEBitwiseXorKernel::run(const Window &window) +void NEBitwiseXorKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); Iterator input1(_input1, window); diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.cpp b/src/core/NEON/kernels/NEBox3x3Kernel.cpp index 551c903dd9..d7178e4690 100644 --- a/src/core/NEON/kernels/NEBox3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEBox3x3Kernel.cpp @@ -34,8 +34,9 @@ using namespace arm_compute; #ifdef ARM_COMPUTE_ENABLE_FP16 -void NEBox3x3FP16Kernel::run(const Window &window) +void NEBox3x3FP16Kernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); @@ -144,8 +145,9 @@ void NEBox3x3Kernel::configure(const ITensor *input, ITensor *output, bool borde INEKernel::configure(win); } -void NEBox3x3Kernel::run(const Window &window) +void NEBox3x3Kernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp index 26df6f6b8b..bcbe790fd0 100644 --- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp +++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp @@ -1677,8 +1677,9 @@ void NEGradientKernel::configure(const ITensor *gx, const ITensor *gy, ITensor * INEKernel::configure(win); } -void NEGradientKernel::run(const Window &window) +void NEGradientKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); @@ -1758,8 +1759,9 @@ void NEEdgeNonMaxSuppressionKernel::configure(const ITensor *magnitude, const IT INEKernel::configure(win); } -void NEEdgeNonMaxSuppressionKernel::run(const Window &window) +void NEEdgeNonMaxSuppressionKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); @@ -1838,8 +1840,9 @@ void NEEdgeTraceKernel::configure(ITensor *input, ITensor *output) INEKernel::configure(win); } -void NEEdgeTraceKernel::run(const Window &window) +void NEEdgeTraceKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); Iterator input(_input, window); diff --git a/src/core/NEON/kernels/NEChannelCombineKernel.cpp b/src/core/NEON/kernels/NEChannelCombineKernel.cpp index 3147a698ad..a2b24de0b4 100644 --- a/src/core/NEON/kernels/NEChannelCombineKernel.cpp +++ b/src/core/NEON/kernels/NEChannelCombineKernel.cpp @@ -284,8 +284,9 @@ bool NEChannelCombineKernel::is_parallelisable() const return _is_parallelizable; } -void NEChannelCombineKernel::run(const Window &window) +void NEChannelCombineKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEChannelExtractKernel.cpp b/src/core/NEON/kernels/NEChannelExtractKernel.cpp index dc31685240..bac24718ba 100644 --- a/src/core/NEON/kernels/NEChannelExtractKernel.cpp +++ b/src/core/NEON/kernels/NEChannelExtractKernel.cpp @@ -264,8 +264,9 @@ void NEChannelExtractKernel::configure(const IMultiImage *input, Channel channel INEKernel::configure(win); } -void NEChannelExtractKernel::run(const Window &window) +void NEChannelExtractKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp index 95a9364082..460d37e85d 100644 --- a/src/core/NEON/kernels/NECol2ImKernel.cpp +++ b/src/core/NEON/kernels/NECol2ImKernel.cpp @@ -116,8 +116,9 @@ void NECol2ImKernel::configure(const ITensor *input, ITensor *output, std::pair< INEKernel::configure(win); } -void NECol2ImKernel::run(const Window &window) +void NECol2ImKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NEColorConvertKernel.cpp b/src/core/NEON/kernels/NEColorConvertKernel.cpp index cb5152e2b3..347aeaede6 100644 --- a/src/core/NEON/kernels/NEColorConvertKernel.cpp +++ b/src/core/NEON/kernels/NEColorConvertKernel.cpp @@ -572,8 +572,9 @@ void NEColorConvertKernel::configure(const IMultiImage *input, IMultiImage *outp INEKernel::configure(win); } -void NEColorConvertKernel::run(const Window &window) +void NEColorConvertKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEConvolutionKernel.cpp b/src/core/NEON/kernels/NEConvolutionKernel.cpp index 30e91ef253..263fbe058a 100644 --- a/src/core/NEON/kernels/NEConvolutionKernel.cpp +++ b/src/core/NEON/kernels/NEConvolutionKernel.cpp @@ -621,8 +621,9 @@ void NEConvolutionKernel<9>::convolution(const Window &win) } template <unsigned int matrix_size> -void NEConvolutionKernel<matrix_size>::run(const Window &window) +void NEConvolutionKernel<matrix_size>::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); @@ -694,8 +695,9 @@ void NESeparableConvolutionHorKernel<matrix_size>::configure(const ITensor *inpu } template <unsigned int matrix_size> -void NESeparableConvolutionHorKernel<matrix_size>::run(const Window &window) +void NESeparableConvolutionHorKernel<matrix_size>::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); switch(_output->info()->data_type()) @@ -1131,8 +1133,9 @@ void NESeparableConvolutionVertKernel<matrix_size>::configure(const ITensor *inp } template <unsigned int matrix_size> -void NESeparableConvolutionVertKernel<matrix_size>::run(const Window &window) +void NESeparableConvolutionVertKernel<matrix_size>::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); @@ -1464,8 +1467,9 @@ void NEConvolutionRectangleKernel::configure(const ITensor *input, ITensor *outp INEKernel::configure(win); } -void NEConvolutionRectangleKernel::run(const Window &window) +void NEConvolutionRectangleKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp b/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp index 32789cbe33..b65f3ba9e8 100644 --- a/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp +++ b/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp @@ -67,8 +67,9 @@ void NECumulativeDistributionKernel::configure(const IImage *input, const IDistr INEKernel::configure(calculate_max_window(*input->info())); } -void NECumulativeDistributionKernel::run(const Window &window) +void NECumulativeDistributionKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_distribution->buffer() == nullptr); diff --git a/src/core/NEON/kernels/NEDepthConcatenateKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateKernel.cpp index d58e4e0aa5..7a62b0cb03 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateKernel.cpp @@ -159,8 +159,9 @@ void NEDepthConcatenateKernel::configure(const ITensor *input, unsigned int dept INEKernel::configure(win); } -void NEDepthConcatenateKernel::run(const Window &window) +void NEDepthConcatenateKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEDepthConvertKernel.cpp b/src/core/NEON/kernels/NEDepthConvertKernel.cpp index f7203701e7..d97a20be65 100644 --- a/src/core/NEON/kernels/NEDepthConvertKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConvertKernel.cpp @@ -120,8 +120,9 @@ void NEDepthConvertKernel::configure(ITensor *input, ITensor *output, ConvertPol ICPPKernel::configure(win); } -void NEDepthConvertKernel::run(const Window &window) +void NEDepthConvertKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); ARM_COMPUTE_ERROR_ON(nullptr == _input); diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp index 70685c92c5..3bf2b35a09 100644 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp @@ -68,8 +68,9 @@ void NEDequantizationLayerKernel::configure(const ITensor *input, ITensor *outpu INEKernel::configure(win); } -void NEDequantizationLayerKernel::run(const Window &window) +void NEDequantizationLayerKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NEDerivativeKernel.cpp b/src/core/NEON/kernels/NEDerivativeKernel.cpp index bf7e0972d5..a5680ebbf9 100644 --- a/src/core/NEON/kernels/NEDerivativeKernel.cpp +++ b/src/core/NEON/kernels/NEDerivativeKernel.cpp @@ -214,8 +214,9 @@ void NEDerivativeKernel::derivative_xy(const Window &window) in, out_x, out_y); } -void NEDerivativeKernel::run(const Window &window) +void NEDerivativeKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEDilateKernel.cpp b/src/core/NEON/kernels/NEDilateKernel.cpp index 867cf77c49..3ee00a47d3 100644 --- a/src/core/NEON/kernels/NEDilateKernel.cpp +++ b/src/core/NEON/kernels/NEDilateKernel.cpp @@ -67,8 +67,9 @@ void NEDilateKernel::configure(const ITensor *input, ITensor *output, bool borde INEKernel::configure(win); } -void NEDilateKernel::run(const Window &window) +void NEDilateKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp index 12ef064803..6631359341 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp @@ -266,8 +266,9 @@ void NEDirectConvolutionLayerBiasAccumulateKernel::configure(ITensor *input, con } } -void NEDirectConvolutionLayerBiasAccumulateKernel::run(const Window &window) +void NEDirectConvolutionLayerBiasAccumulateKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp index 3dd07fcdbe..d4171c5a67 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp @@ -1378,8 +1378,9 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens INEKernel::configure(win); } -void NEDirectConvolutionLayerKernel::run(const Window &window) +void NEDirectConvolutionLayerKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_input->buffer() == nullptr); diff --git a/src/core/NEON/kernels/NEErodeKernel.cpp b/src/core/NEON/kernels/NEErodeKernel.cpp index 398503627c..88c20f8174 100644 --- a/src/core/NEON/kernels/NEErodeKernel.cpp +++ b/src/core/NEON/kernels/NEErodeKernel.cpp @@ -67,8 +67,9 @@ void NEErodeKernel::configure(const ITensor *input, ITensor *output, bool border INEKernel::configure(win); } -void NEErodeKernel::run(const Window &window) +void NEErodeKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); diff --git a/src/core/NEON/kernels/NEFastCornersKernel.cpp b/src/core/NEON/kernels/NEFastCornersKernel.cpp index 9e8b5526a1..919efd2ae2 100644 --- a/src/core/NEON/kernels/NEFastCornersKernel.cpp +++ b/src/core/NEON/kernels/NEFastCornersKernel.cpp @@ -388,8 +388,9 @@ void NEFastCornersKernel::configure(const IImage *input, IImage *output, uint8_t INEKernel::configure(win); } -void NEFastCornersKernel::run(const Window &window) +void NEFastCornersKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NEFillArrayKernel.cpp b/src/core/NEON/kernels/NEFillArrayKernel.cpp index 7e7e1c2501..5a2e1a0aa4 100644 --- a/src/core/NEON/kernels/NEFillArrayKernel.cpp +++ b/src/core/NEON/kernels/NEFillArrayKernel.cpp @@ -62,8 +62,9 @@ bool NEFillArrayKernel::is_parallelisable() const return false; } -void NEFillArrayKernel::run(const Window &window) +void NEFillArrayKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp index 7d191c18b0..65d5388c4b 100644 --- a/src/core/NEON/kernels/NEFillBorderKernel.cpp +++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp @@ -124,8 +124,10 @@ void NEFillBorderKernel::configure(ITensor *tensor, BorderSize border_size, Bord INEKernel::configure(win); } -void NEFillBorderKernel::run(const Window &window) +void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); + // If there is no border: early exit if(_border_size.empty()) { diff --git a/src/core/NEON/kernels/NEFillInnerBorderKernel.cpp b/src/core/NEON/kernels/NEFillInnerBorderKernel.cpp index 699a5d9299..5323733fd3 100644 --- a/src/core/NEON/kernels/NEFillInnerBorderKernel.cpp +++ b/src/core/NEON/kernels/NEFillInnerBorderKernel.cpp @@ -61,8 +61,9 @@ void NEFillInnerBorderKernel::configure(ITensor *input, BorderSize border_size, INEKernel::configure(win); } -void NEFillInnerBorderKernel::run(const Window &window) +void NEFillInnerBorderKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NEFloorKernel.cpp b/src/core/NEON/kernels/NEFloorKernel.cpp index 1dc376f738..dd85ac1fd6 100644 --- a/src/core/NEON/kernels/NEFloorKernel.cpp +++ b/src/core/NEON/kernels/NEFloorKernel.cpp @@ -64,8 +64,9 @@ void NEFloorKernel::configure(const ITensor *input, ITensor *output) INEKernel::configure(win); } -void NEFloorKernel::run(const Window &window) +void NEFloorKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp index 40ece9faab..c76c39aa4b 100644 --- a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp +++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp @@ -182,8 +182,9 @@ void NEGEMMInterleave4x4Kernel::configure(const ITensor *input, ITensor *output) INEKernel::configure(win); } -void NEGEMMInterleave4x4Kernel::run(const Window &window) +void NEGEMMInterleave4x4Kernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp index 3558c686b1..cbba4461a2 100644 --- a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp @@ -81,8 +81,9 @@ void NEGEMMLowpMatrixMultiplyKernel::configure(const ITensor *input0, const ITen INEKernel::configure(win); } -void NEGEMMLowpMatrixMultiplyKernel::run(const Window &window) +void NEGEMMLowpMatrixMultiplyKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp index 6ed3791ce5..fb07cb0333 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp @@ -72,8 +72,9 @@ void NEGEMMMatrixAccumulateBiasesKernel::configure(ITensor *accum, const ITensor INEKernel::configure(win); } -void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window) +void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp index f2cd18d827..9dbce1de2f 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp @@ -183,8 +183,9 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output _beta = beta; } -void NEGEMMMatrixAdditionKernel::run(const Window &window) +void NEGEMMMatrixAdditionKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp index 8a2a481bde..b872bab928 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp @@ -51,7 +51,7 @@ class Coordinates; namespace { template <bool multiply_alpha> -void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, float alpha) +void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info, float alpha) { #ifdef ARM_COMPUTE_ENABLE_FP16 const auto width_matrix_b = static_cast<int>(output->info()->dimension(0)); @@ -59,8 +59,8 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT const auto num_elems_vec_a = static_cast<int>(input0->info()->dimension(0)); // The implementation computes 32 elements per iteration - const int window_start_x = 32 * window.thread_id(); - const int window_step_x = 32 * window.num_threads(); + const int window_start_x = 32 * info.thread_id; + const int window_step_x = 32 * info.num_threads; const int window_end_x = ceil_to_multiple(width_matrix_b - window_start_x, window_step_x) + window_start_x; ARM_COMPUTE_ERROR_ON_MSG((window_end_x - window_start_x) % window_step_x, " (window_end_x - window_start_x) must be multiple of window_step_x"); @@ -192,15 +192,15 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT } template <bool multiply_alpha> -void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, float alpha) +void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info, float alpha) { const auto width_matrix_b = static_cast<int>(output->info()->dimension(0)); const auto in_b_stride = static_cast<int>(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type())); const auto num_elems_vec_a = static_cast<int>(input0->info()->dimension(0)); // The implementation computes 16 elements per iteration - const int window_start_x = 16 * window.thread_id(); - const int window_step_x = 16 * window.num_threads(); + const int window_start_x = 16 * info.thread_id; + const int window_step_x = 16 * info.num_threads; // Make sure (window_end_x - window_start_x) is a multiple of window_step_x const int window_end_x = ceil_to_multiple(width_matrix_b - window_start_x, window_step_x) + window_start_x; @@ -348,7 +348,7 @@ void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT } template <bool multiply_alpha> -void vector_matrix_multiply_qs8(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, float alpha) +void vector_matrix_multiply_qs8(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info, float alpha) { const auto width_matrix_b = static_cast<int>(output->info()->dimension(0)); const auto in_b_stride = static_cast<int>(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type())); @@ -356,8 +356,8 @@ void vector_matrix_multiply_qs8(const ITensor *input0, const ITensor *input1, IT const int fixed_point_position = input0->info()->fixed_point_position(); // The implementation computes 32 elements per iteration - const int window_start_x = 32 * window.thread_id(); - const int window_step_x = 32 * window.num_threads(); + const int window_start_x = 32 * info.thread_id; + const int window_step_x = 32 * info.num_threads; // Make sure (window_end_x - window_start_x) is a multiple of window_step_x const int window_end_x = ceil_to_multiple(width_matrix_b - window_start_x, window_step_x) + window_start_x; @@ -476,7 +476,7 @@ void vector_matrix_multiply_qs8(const ITensor *input0, const ITensor *input1, IT } template <bool multiply_alpha> -void vector_matrix_multiply_qs16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, float alpha) +void vector_matrix_multiply_qs16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info, float alpha) { const auto width_matrix_b = static_cast<int>(output->info()->dimension(0)); const auto in_b_stride = static_cast<int>(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type())); @@ -484,8 +484,8 @@ void vector_matrix_multiply_qs16(const ITensor *input0, const ITensor *input1, I const int fixed_point_position = input0->info()->fixed_point_position(); // The implementation computes 16 elements per iteration - const int window_start_x = 16 * window.thread_id(); - const int window_step_x = 16 * window.num_threads(); + const int window_start_x = 16 * info.thread_id; + const int window_step_x = 16 * info.num_threads; // Make sure (window_end_x - window_start_x) is a multiple of window_step_x const int window_end_x = ceil_to_multiple(width_matrix_b - window_start_x, window_step_x) + window_start_x; ARM_COMPUTE_ERROR_ON_MSG((window_end_x - window_start_x) % window_step_x, " (window_end_x - window_start_x) must be multiple of window_step_x"); @@ -1522,7 +1522,7 @@ void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor } } -void NEGEMMMatrixMultiplyKernel::run(const Window &window) +void NEGEMMMatrixMultiplyKernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); @@ -1536,27 +1536,27 @@ void NEGEMMMatrixMultiplyKernel::run(const Window &window) { case DataType::F32: { - multiply_alpha ? vector_matrix_multiply_f32<true>(_input0, _input1, _output, window, _alpha) : - vector_matrix_multiply_f32<false>(_input0, _input1, _output, window, _alpha); + multiply_alpha ? vector_matrix_multiply_f32<true>(_input0, _input1, _output, window, info, _alpha) : + vector_matrix_multiply_f32<false>(_input0, _input1, _output, window, info, _alpha); break; } case DataType::QS8: { - multiply_alpha ? vector_matrix_multiply_qs8<true>(_input0, _input1, _output, window, _alpha) : - vector_matrix_multiply_qs8<false>(_input0, _input1, _output, window, _alpha); + multiply_alpha ? vector_matrix_multiply_qs8<true>(_input0, _input1, _output, window, info, _alpha) : + vector_matrix_multiply_qs8<false>(_input0, _input1, _output, window, info, _alpha); break; } case DataType::QS16: { - multiply_alpha ? vector_matrix_multiply_qs16<true>(_input0, _input1, _output, window, _alpha) : - vector_matrix_multiply_qs16<false>(_input0, _input1, _output, window, _alpha); + multiply_alpha ? vector_matrix_multiply_qs16<true>(_input0, _input1, _output, window, info, _alpha) : + vector_matrix_multiply_qs16<false>(_input0, _input1, _output, window, info, _alpha); break; } #ifdef ARM_COMPUTE_ENABLE_FP16 case DataType::F16: { - multiply_alpha ? vector_matrix_multiply_f16<true>(_input0, _input1, _output, window, _alpha) : - vector_matrix_multiply_f16<false>(_input0, _input1, _output, window, _alpha); + multiply_alpha ? vector_matrix_multiply_f16<true>(_input0, _input1, _output, window, info, _alpha) : + vector_matrix_multiply_f16<false>(_input0, _input1, _output, window, info, _alpha); break; } #endif /* ARM_COMPUTE_ENABLE_FP16 */ diff --git a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp index 95063a7875..7f4ee1ec49 100644 --- a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp @@ -81,8 +81,9 @@ void NEGEMMTranspose1xWKernel::configure(const ITensor *input, ITensor *output) INEKernel::configure(win); } -void NEGEMMTranspose1xWKernel::run(const Window &window) +void NEGEMMTranspose1xWKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); diff --git a/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp b/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp index 419f4825ef..048c22933c 100644 --- a/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp @@ -64,8 +64,9 @@ void NEGaussian3x3Kernel::configure(const ITensor *input, ITensor *output, bool INEKernel::configure(win); } -void NEGaussian3x3Kernel::run(const Window &window) +void NEGaussian3x3Kernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); diff --git a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp index 4e1880d968..b62e2816c0 100644 --- a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp +++ b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp @@ -73,8 +73,9 @@ void NEGaussian5x5HorKernel::configure(const ITensor *input, ITensor *output, bo INEKernel::configure(win); } -void NEGaussian5x5HorKernel::run(const Window &window) +void NEGaussian5x5HorKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); @@ -140,8 +141,9 @@ void NEGaussian5x5VertKernel::configure(const ITensor *input, ITensor *output, b INEKernel::configure(win); } -void NEGaussian5x5VertKernel::run(const Window &window) +void NEGaussian5x5VertKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); diff --git a/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp b/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp index 52d1fbf028..d6cb1b6444 100644 --- a/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp +++ b/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp @@ -110,8 +110,9 @@ void NEGaussianPyramidHorKernel::configure(const ITensor *input, ITensor *output INEKernel::configure(win); } -void NEGaussianPyramidHorKernel::run(const Window &window) +void NEGaussianPyramidHorKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(window.x().step() % 2); @@ -215,8 +216,9 @@ void NEGaussianPyramidVertKernel::configure(const ITensor *input, ITensor *outpu INEKernel::configure(win); } -void NEGaussianPyramidVertKernel::run(const Window &window) +void NEGaussianPyramidVertKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(window.x().step() != 16); diff --git a/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp b/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp index 404ad8a388..3fd81bed1c 100644 --- a/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp +++ b/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp @@ -675,8 +675,9 @@ void NEHOGOrientationBinningKernel::configure(const ITensor *input_magnitude, co INEKernel::configure(win); } -void NEHOGOrientationBinningKernel::run(const Window &window) +void NEHOGOrientationBinningKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); @@ -768,8 +769,9 @@ void NEHOGBlockNormalizationKernel::configure(const ITensor *input, ITensor *out INEKernel::configure(win); } -void NEHOGBlockNormalizationKernel::run(const Window &window) +void NEHOGBlockNormalizationKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON(_func == nullptr); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); diff --git a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp index 9dd50ca294..343b0517b0 100644 --- a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp +++ b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp @@ -92,8 +92,9 @@ void NEHOGDetectorKernel::configure(const ITensor *input, const IHOG *hog, IDete INEKernel::configure(win); } -void NEHOGDetectorKernel::run(const Window &window) +void NEHOGDetectorKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); ARM_COMPUTE_ERROR_ON(_hog_descriptor == nullptr); diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp index d44f4ce3b8..233b2baabe 100644 --- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp +++ b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp @@ -287,8 +287,9 @@ NEHarrisScoreFP16Kernel<block_size>::NEHarrisScoreFP16Kernel() } template <int32_t block_size> -void NEHarrisScoreFP16Kernel<block_size>::run(const Window &window) +void NEHarrisScoreFP16Kernel<block_size>::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); @@ -1029,8 +1030,9 @@ NEHarrisScoreKernel<block_size>::NEHarrisScoreKernel() } template <int32_t block_size> -void NEHarrisScoreKernel<block_size>::run(const Window &window) +void NEHarrisScoreKernel<block_size>::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEHistogramKernel.cpp b/src/core/NEON/kernels/NEHistogramKernel.cpp index 2018651083..6e402ae604 100644 --- a/src/core/NEON/kernels/NEHistogramKernel.cpp +++ b/src/core/NEON/kernels/NEHistogramKernel.cpp @@ -66,7 +66,7 @@ NEHistogramKernel::NEHistogramKernel() { } -void NEHistogramKernel::histogram_U8(Window win) +void NEHistogramKernel::histogram_U8(Window win, const ThreadInfo &info) { ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr); @@ -74,7 +74,7 @@ void NEHistogramKernel::histogram_U8(Window win) const int32_t offset = _output->offset(); const uint32_t offrange = offset + _output->range(); const uint32_t *const w_lut = _window_lut; - uint32_t *const local_hist = _local_hist + win.thread_id() * bins; + uint32_t *const local_hist = _local_hist + info.thread_id * bins; // Clear local_histogram std::fill_n(local_hist, bins, 0); @@ -129,8 +129,9 @@ void NEHistogramKernel::histogram_U8(Window win) merge_histogram(_output->buffer(), local_hist, bins); } -void NEHistogramKernel::histogram_fixed_U8(Window win) +void NEHistogramKernel::histogram_fixed_U8(Window win, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr); std::array<uint32_t, _max_range_size> local_hist{ { 0 } }; @@ -242,11 +243,11 @@ void NEHistogramKernel::configure(const IImage *input, IDistribution1D *output) INEKernel::configure(win); } -void NEHistogramKernel::run(const Window &window) +void NEHistogramKernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - (this->*_func)(window); + (this->*_func)(window, info); } diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 6e15f82b6d..3e50277cdf 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -362,8 +362,9 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size IKernel::configure(window); } -void NEIm2ColKernel::run(const Window &window) +void NEIm2ColKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NEIntegralImageKernel.cpp b/src/core/NEON/kernels/NEIntegralImageKernel.cpp index 3b09a1bdbb..16a3cf7f07 100644 --- a/src/core/NEON/kernels/NEIntegralImageKernel.cpp +++ b/src/core/NEON/kernels/NEIntegralImageKernel.cpp @@ -71,8 +71,9 @@ bool NEIntegralImageKernel::is_parallelisable() const return false; } -void NEIntegralImageKernel::run(const Window &window) +void NEIntegralImageKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); diff --git a/src/core/NEON/kernels/NEL2NormalizeKernel.cpp b/src/core/NEON/kernels/NEL2NormalizeKernel.cpp index f3f58b6eb3..12c532afd5 100644 --- a/src/core/NEON/kernels/NEL2NormalizeKernel.cpp +++ b/src/core/NEON/kernels/NEL2NormalizeKernel.cpp @@ -109,8 +109,9 @@ void NEL2NormalizeKernel::configure(const ITensor *input, const ITensor *sum, IT INEKernel::configure(win); } -void NEL2NormalizeKernel::run(const Window &window) +void NEL2NormalizeKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NELKTrackerKernel.cpp b/src/core/NEON/kernels/NELKTrackerKernel.cpp index 3d2bfb204e..6fac7975df 100644 --- a/src/core/NEON/kernels/NELKTrackerKernel.cpp +++ b/src/core/NEON/kernels/NELKTrackerKernel.cpp @@ -385,8 +385,9 @@ void NELKTrackerKernel::configure(const ITensor *input_old, const ITensor *input INEKernel::configure(window); } -void NELKTrackerKernel::run(const Window &window) +void NELKTrackerKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp index 2b7b391c43..a02ebf61ee 100644 --- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp @@ -49,7 +49,7 @@ class Coordinates; namespace { -void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window) +void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info) { #ifdef ARM_COMPUTE_ENABLE_FP16 const auto width_matrix_b = static_cast<int>(output->info()->dimension(0)); @@ -57,8 +57,8 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT const auto num_elems_vec_a = static_cast<int>(input0->info()->dimension(0)); // The implementation computes 16 elements per iteration - const int window_start_x = 16 * window.thread_id(); - const int window_step_x = 16 * window.num_threads(); + const int window_start_x = 16 * info.thread_id; + const int window_step_x = 16 * info.num_threads; // Make sure (window_end_x - window_start_x) is a multiple of window_step_x const int window_end_x = ceil_to_multiple(width_matrix_b - window_start_x, window_step_x) + window_start_x; @@ -169,15 +169,15 @@ void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT #endif /* ARM_COMPUTE_ENABLE_FP16 */ } -void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window) +void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info) { const auto width_matrix_b = static_cast<int>(output->info()->dimension(0)); const auto in_b_stride = static_cast<int>(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type())); const auto num_elems_vec_a = static_cast<int>(input0->info()->dimension(0)); // The implementation computes 16 elements per iteration - const int window_start_x = 16 * window.thread_id(); - const int window_step_x = 16 * window.num_threads(); + const int window_start_x = 16 * info.thread_id; + const int window_step_x = 16 * info.num_threads; // Make sure (window_end_x - window_start_x) is a multiple of window_step_x const int window_end_x = ceil_to_multiple(width_matrix_b - window_start_x, window_step_x) + window_start_x; @@ -337,7 +337,7 @@ void NELocallyConnectedMatrixMultiplyKernel::configure(const ITensor *input0, co INEKernel::configure(win); } -void NELocallyConnectedMatrixMultiplyKernel::run(const Window &window) +void NELocallyConnectedMatrixMultiplyKernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); @@ -346,12 +346,12 @@ void NELocallyConnectedMatrixMultiplyKernel::run(const Window &window) { case DataType::F16: { - vector_matrix_multiply_f16(_input0, _input1, _output, window); + vector_matrix_multiply_f16(_input0, _input1, _output, window, info); break; } case DataType::F32: { - vector_matrix_multiply_f32(_input0, _input1, _output, window); + vector_matrix_multiply_f32(_input0, _input1, _output, window, info); break; } default: diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp index 599dad6c70..433985f6fa 100644 --- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp +++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp @@ -415,8 +415,9 @@ void NEMagnitudePhaseFP16Kernel<mag_type, phase_type>::magnitude_phase(const Win } template <MagnitudeType mag_type, PhaseType phase_type> -void NEMagnitudePhaseFP16Kernel<mag_type, phase_type>::run(const Window &window) +void NEMagnitudePhaseFP16Kernel<mag_type, phase_type>::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); @@ -854,8 +855,9 @@ void NEMagnitudePhaseKernel<mag_type, phase_type>::magnitude_phase(const Window } template <MagnitudeType mag_type, PhaseType phase_type> -void NEMagnitudePhaseKernel<mag_type, phase_type>::run(const Window &window) +void NEMagnitudePhaseKernel<mag_type, phase_type>::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp index cb32a3b45b..7895b009d6 100644 --- a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp @@ -120,8 +120,9 @@ void NEMeanStdDevKernel::configure(const IImage *input, float *mean, uint64_t *g INEKernel::configure(win); } -void NEMeanStdDevKernel::run(const Window &window) +void NEMeanStdDevKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); Iterator input(_input, window); diff --git a/src/core/NEON/kernels/NEMedian3x3Kernel.cpp b/src/core/NEON/kernels/NEMedian3x3Kernel.cpp index 601a0e109f..54ef33ec36 100644 --- a/src/core/NEON/kernels/NEMedian3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEMedian3x3Kernel.cpp @@ -75,8 +75,9 @@ void NEMedian3x3Kernel::configure(const ITensor *input, ITensor *output, bool bo INEKernel::configure(win); } -void NEMedian3x3Kernel::run(const Window &window) +void NEMedian3x3Kernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); diff --git a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp index 24c7af7af7..c7dc03c8c8 100644 --- a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp +++ b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp @@ -80,8 +80,9 @@ void NEMinMaxKernel::configure(const IImage *input, void *min, void *max) INEKernel::configure(win); } -void NEMinMaxKernel::run(const Window &window) +void NEMinMaxKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); @@ -400,8 +401,9 @@ void NEMinMaxLocationKernel::configure(const IImage *input, void *min, void *max INEKernel::configure(win); } -void NEMinMaxLocationKernel::run(const Window &window) +void NEMinMaxLocationKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NENonLinearFilterKernel.cpp b/src/core/NEON/kernels/NENonLinearFilterKernel.cpp index 03d1409be1..ba68de638b 100644 --- a/src/core/NEON/kernels/NENonLinearFilterKernel.cpp +++ b/src/core/NEON/kernels/NENonLinearFilterKernel.cpp @@ -930,8 +930,9 @@ void NENonLinearFilterKernel::non_linear_filter_generic(const Window &win) input, output); } -void NENonLinearFilterKernel::run(const Window &window) +void NENonLinearFilterKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp index ece7b9a10f..b7dfb59252 100644 --- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp +++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp @@ -495,8 +495,9 @@ void NENonMaximaSuppression3x3Kernel::configure(const ITensor *input, ITensor *o INEKernel::configure(win); } -void NENonMaximaSuppression3x3Kernel::run(const Window &window) +void NENonMaximaSuppression3x3Kernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp index 0e15244f0e..b444c9d4df 100644 --- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp @@ -373,8 +373,9 @@ void NENormalizationLayerKernel::normalize_fixed_point(const Window &window) } } -void NENormalizationLayerKernel::run(const Window &window) +void NENormalizationLayerKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index 33663eb57d..40dcc710de 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -639,8 +639,9 @@ void NEPixelWiseMultiplicationKernel::configure(const ITensor *input1, const ITe INEKernel::configure(win); } -void NEPixelWiseMultiplicationKernel::run(const Window &window) +void NEPixelWiseMultiplicationKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index 63c7c8ea47..fdcbd5a898 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -805,8 +805,9 @@ void NEPoolingLayerKernel::pooling7_f32(const Window &window_input, const Window input, output); } -void NEPoolingLayerKernel::run(const Window &window) +void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp index fbf7e1ba87..a596d835cb 100644 --- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp @@ -66,8 +66,9 @@ void NEQuantizationLayerKernel::configure(const ITensor *input, ITensor *output, INEKernel::configure(win); } -void NEQuantizationLayerKernel::run(const Window &window) +void NEQuantizationLayerKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp index fd6210021c..83ab611b67 100644 --- a/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp @@ -79,8 +79,9 @@ void NEROIPoolingLayerKernel::configure(const ITensor *input, const IROIArray *r INEKernel::configure(window); } -void NEROIPoolingLayerKernel::run(const Window &window) +void NEROIPoolingLayerKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index 5c8954609b..1a50ed8bfc 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -141,8 +141,9 @@ void NEReductionOperationKernel::configure(const ITensor *input, ITensor *output INEKernel::configure(win); } -void NEReductionOperationKernel::run(const Window &window) +void NEReductionOperationKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NERemapKernel.cpp b/src/core/NEON/kernels/NERemapKernel.cpp index c3c44a5f32..2dbabca2e3 100644 --- a/src/core/NEON/kernels/NERemapKernel.cpp +++ b/src/core/NEON/kernels/NERemapKernel.cpp @@ -216,8 +216,9 @@ void NERemapKernel::remap_bilinear(const Window &window) in, out, mapx, mapy); } -void NERemapKernel::run(const Window &window) +void NERemapKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp index ae164eb979..7ec4212227 100644 --- a/src/core/NEON/kernels/NEScaleKernel.cpp +++ b/src/core/NEON/kernels/NEScaleKernel.cpp @@ -363,8 +363,9 @@ void NEScaleKernel::scale_area(const Window &window) in, out); } -void NEScaleKernel::run(const Window &window) +void NEScaleKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEScharr3x3Kernel.cpp b/src/core/NEON/kernels/NEScharr3x3Kernel.cpp index 183df1efcb..f23c31bc0a 100644 --- a/src/core/NEON/kernels/NEScharr3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEScharr3x3Kernel.cpp @@ -135,8 +135,9 @@ BorderSize NEScharr3x3Kernel::border_size() const return BorderSize(1); } -void NEScharr3x3Kernel::run(const Window &window) +void NEScharr3x3Kernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NESobel3x3Kernel.cpp b/src/core/NEON/kernels/NESobel3x3Kernel.cpp index ab08a1cfeb..5a80630a76 100644 --- a/src/core/NEON/kernels/NESobel3x3Kernel.cpp +++ b/src/core/NEON/kernels/NESobel3x3Kernel.cpp @@ -88,8 +88,9 @@ void NESobel3x3Kernel::configure(const ITensor *input, ITensor *output_x, ITenso INEKernel::configure(win); } -void NESobel3x3Kernel::run(const Window &window) +void NESobel3x3Kernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NESobel5x5Kernel.cpp b/src/core/NEON/kernels/NESobel5x5Kernel.cpp index 488eee1176..30e7817aa4 100644 --- a/src/core/NEON/kernels/NESobel5x5Kernel.cpp +++ b/src/core/NEON/kernels/NESobel5x5Kernel.cpp @@ -90,8 +90,9 @@ void NESobel5x5HorKernel::configure(const ITensor *input, ITensor *output_x, ITe INEKernel::configure(win); } -void NESobel5x5HorKernel::run(const Window &window) +void NESobel5x5HorKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); @@ -261,8 +262,9 @@ void NESobel5x5VertKernel::configure(ITensor *input_x, ITensor *input_y, ITensor INEKernel::configure(win); } -void NESobel5x5VertKernel::run(const Window &window) +void NESobel5x5VertKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NESobel7x7Kernel.cpp b/src/core/NEON/kernels/NESobel7x7Kernel.cpp index 9761942c69..4cc80f8e2e 100644 --- a/src/core/NEON/kernels/NESobel7x7Kernel.cpp +++ b/src/core/NEON/kernels/NESobel7x7Kernel.cpp @@ -193,8 +193,9 @@ void NESobel7x7HorKernel::configure(const ITensor *input, ITensor *output_x, ITe INEKernel::configure(win); } -void NESobel7x7HorKernel::run(const Window &window) +void NESobel7x7HorKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); @@ -351,8 +352,9 @@ void NESobel7x7VertKernel::configure(const ITensor *input_x, const ITensor *inpu INEKernel::configure(win); } -void NESobel7x7VertKernel::run(const Window &window) +void NESobel7x7VertKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index 4fed16b5fa..648dac46c0 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -239,8 +239,9 @@ void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output) INEKernel::configure(win); } -void NELogits1DMaxKernel::run(const Window &window) +void NELogits1DMaxKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); @@ -560,8 +561,9 @@ void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor INEKernel::configure(win); } -void NELogits1DShiftExpSumKernel::run(const Window &window) +void NELogits1DShiftExpSumKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); @@ -758,8 +760,9 @@ void NELogits1DNormKernel::configure(const ITensor *input, const ITensor *sum, I INEKernel::configure(win); } -void NELogits1DNormKernel::run(const Window &window) +void NELogits1DNormKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NETableLookupKernel.cpp b/src/core/NEON/kernels/NETableLookupKernel.cpp index f0b58d82f6..958f4a9cfb 100644 --- a/src/core/NEON/kernels/NETableLookupKernel.cpp +++ b/src/core/NEON/kernels/NETableLookupKernel.cpp @@ -133,8 +133,9 @@ void NETableLookupKernel::configure(const ITensor *input, const ILut *lut, ITens INESimpleKernel::configure(input, output, num_num_elems_processed_per_iteration); } -void NETableLookupKernel::run(const Window &window) +void NETableLookupKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEThresholdKernel.cpp b/src/core/NEON/kernels/NEThresholdKernel.cpp index 72031195d9..5ef06931cc 100644 --- a/src/core/NEON/kernels/NEThresholdKernel.cpp +++ b/src/core/NEON/kernels/NEThresholdKernel.cpp @@ -119,8 +119,9 @@ inline void NEThresholdKernel::run_range(const Window &window) input, output); } -void NEThresholdKernel::run(const Window &window) +void NEThresholdKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NETransposeKernel.cpp b/src/core/NEON/kernels/NETransposeKernel.cpp index 732a0ef4f6..1cfaafe93a 100644 --- a/src/core/NEON/kernels/NETransposeKernel.cpp +++ b/src/core/NEON/kernels/NETransposeKernel.cpp @@ -233,8 +233,9 @@ void NETransposeKernel::configure(const ITensor *input, ITensor *output) INEKernel::configure(win); } -void NETransposeKernel::run(const Window &window) +void NETransposeKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEWarpKernel.cpp b/src/core/NEON/kernels/NEWarpKernel.cpp index a1ec1a0b51..b13e99e800 100644 --- a/src/core/NEON/kernels/NEWarpKernel.cpp +++ b/src/core/NEON/kernels/NEWarpKernel.cpp @@ -54,8 +54,9 @@ BorderSize INEWarpKernel::border_size() const return BorderSize(1); } -void INEWarpKernel::run(const Window &window) +void INEWarpKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp index d685ec7962..d52e88c37a 100644 --- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp @@ -165,8 +165,9 @@ void NEWeightsReshapeKernel::configure(const ITensor *input, const ITensor *bias INEKernel::configure(window); } -void NEWeightsReshapeKernel::run(const Window &window) +void NEWeightsReshapeKernel::run(const Window &window, const ThreadInfo &info) { + ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); diff --git a/src/runtime/CL/functions/CLHOGMultiDetection.cpp b/src/runtime/CL/functions/CLHOGMultiDetection.cpp index 6def2dedc9..9eed355710 100644 --- a/src/runtime/CL/functions/CLHOGMultiDetection.cpp +++ b/src/runtime/CL/functions/CLHOGMultiDetection.cpp @@ -29,6 +29,7 @@ #include "arm_compute/runtime/CL/CLArray.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/Scheduler.h" #include "support/ToolchainSupport.h" using namespace arm_compute; @@ -246,7 +247,7 @@ void CLHOGMultiDetection::run() { // Map detection windows array before computing non maxima suppression _detection_windows->map(CLScheduler::get().queue(), true); - _non_maxima_kernel->run(_non_maxima_kernel->window()); + Scheduler::get().schedule(_non_maxima_kernel.get(), Window::DimY); _detection_windows->unmap(CLScheduler::get().queue()); } } diff --git a/src/runtime/CL/functions/CLHarrisCorners.cpp b/src/runtime/CL/functions/CLHarrisCorners.cpp index 8f9fcdc58f..2140240753 100644 --- a/src/runtime/CL/functions/CLHarrisCorners.cpp +++ b/src/runtime/CL/functions/CLHarrisCorners.cpp @@ -165,6 +165,6 @@ void CLHarrisCorners::run() _nonmax.unmap(); _corners->map(CLScheduler::get().queue(), true); - _sort_euclidean.run(_sort_euclidean.window()); + Scheduler::get().schedule(&_sort_euclidean, Window::DimY); _corners->unmap(CLScheduler::get().queue()); } diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp index 2a321a1101..9cc3f033c2 100644 --- a/src/runtime/CPP/CPPScheduler.cpp +++ b/src/runtime/CPP/CPPScheduler.cpp @@ -52,7 +52,7 @@ public: * This function will return as soon as the kernel has been sent to the worker thread. * wait() needs to be called to ensure the execution is complete. */ - void start(ICPPKernel *kernel, const Window &window); + void start(ICPPKernel *kernel, const Window &window, const ThreadInfo &info); /** Wait for the current kernel execution to complete */ void wait(); @@ -64,13 +64,14 @@ private: std::thread _thread; ICPPKernel *_kernel{ nullptr }; Window _window; + ThreadInfo _info; sem_t _wait_for_work; sem_t _job_complete; std::exception_ptr _current_exception; }; Thread::Thread() - : _thread(), _window(), _wait_for_work(), _job_complete(), _current_exception(nullptr) + : _thread(), _window(), _info(), _wait_for_work(), _job_complete(), _current_exception(nullptr) { int ret = sem_init(&_wait_for_work, 0, 0); ARM_COMPUTE_ERROR_ON(ret < 0); @@ -87,7 +88,7 @@ Thread::~Thread() { ARM_COMPUTE_ERROR_ON(!_thread.joinable()); - start(nullptr, Window()); + start(nullptr, Window(), ThreadInfo()); _thread.join(); int ret = sem_destroy(&_wait_for_work); @@ -99,10 +100,11 @@ Thread::~Thread() ARM_COMPUTE_UNUSED(ret); } -void Thread::start(ICPPKernel *kernel, const Window &window) +void Thread::start(ICPPKernel *kernel, const Window &window, const ThreadInfo &info) { _kernel = kernel; _window = window; + _info = info; int ret = sem_post(&_wait_for_work); ARM_COMPUTE_UNUSED(ret); ARM_COMPUTE_ERROR_ON(ret < 0); @@ -133,7 +135,7 @@ void Thread::worker_thread() try { _window.validate(); - _kernel->run(_window); + _kernel->run(_window, _info); } catch(...) { @@ -163,8 +165,7 @@ CPPScheduler &CPPScheduler::get() CPPScheduler::CPPScheduler() : _num_threads(std::thread::hardware_concurrency()), - _threads(std::unique_ptr<Thread[], void(*)(Thread *)>(new Thread[std::thread::hardware_concurrency() - 1], delete_threads)), - _target(CPUTarget::INTRINSICS) + _threads(std::unique_ptr<Thread[], void(*)(Thread *)>(new Thread[std::thread::hardware_concurrency() - 1], delete_threads)) { } @@ -179,50 +180,42 @@ unsigned int CPPScheduler::num_threads() const return _num_threads; } -void CPPScheduler::set_target(CPUTarget target) -{ - _target = target; -} - -CPUTarget CPPScheduler::target() const -{ - return _target; -} - void CPPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) { ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); /** [Scheduler example] */ + ThreadInfo info; + info.cpu = _target; + const Window &max_window = kernel->window(); const unsigned int num_iterations = max_window.num_iterations(split_dimension); - const unsigned int num_threads = std::min(num_iterations, _num_threads); + info.num_threads = std::min(num_iterations, _num_threads); - if(!kernel->is_parallelisable() || 1 == num_threads) + if(!kernel->is_parallelisable() || info.num_threads == 1) { - kernel->run(max_window); + kernel->run(max_window, info); } else { - for(unsigned int t = 0; t < num_threads; ++t) + for(int t = 0; t < info.num_threads; ++t) { - Window win = max_window.split_window(split_dimension, t, num_threads); - win.set_thread_id(t); - win.set_num_threads(num_threads); + Window win = max_window.split_window(split_dimension, t, info.num_threads); + info.thread_id = t; - if(t != num_threads - 1) + if(t != info.num_threads - 1) { - _threads[t].start(kernel, win); + _threads[t].start(kernel, win, info); } else { - kernel->run(win); + kernel->run(win, info); } } try { - for(unsigned int t = 1; t < num_threads; ++t) + for(int t = 1; t < info.num_threads; ++t) { _threads[t - 1].wait(); } diff --git a/src/runtime/CPP/SingleThreadScheduler.cpp b/src/runtime/CPP/SingleThreadScheduler.cpp index f086813e91..4e46a59fd0 100644 --- a/src/runtime/CPP/SingleThreadScheduler.cpp +++ b/src/runtime/CPP/SingleThreadScheduler.cpp @@ -38,12 +38,15 @@ SingleThreadScheduler &SingleThreadScheduler::get() void SingleThreadScheduler::set_num_threads(unsigned int num_threads) { ARM_COMPUTE_UNUSED(num_threads); + ARM_COMPUTE_ERROR_ON(num_threads != 1); } void SingleThreadScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) { ARM_COMPUTE_UNUSED(split_dimension); - kernel->run(kernel->window()); + ThreadInfo info; + info.cpu = _target; + kernel->run(kernel->window(), info); } unsigned int SingleThreadScheduler::num_threads() const diff --git a/src/runtime/NEON/INESimpleFunction.cpp b/src/runtime/NEON/INESimpleFunction.cpp index a748a1e4ad..23d9872294 100644 --- a/src/runtime/NEON/INESimpleFunction.cpp +++ b/src/runtime/NEON/INESimpleFunction.cpp @@ -35,6 +35,6 @@ INESimpleFunction::INESimpleFunction() // NOLINT void INESimpleFunction::run() { - _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_border_handler, Window::DimZ); NEScheduler::get().schedule(_kernel.get(), Window::DimY); } diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp index ca8877e2fc..318cea2342 100644 --- a/src/runtime/NEON/functions/NECannyEdge.cpp +++ b/src/runtime/NEON/functions/NECannyEdge.cpp @@ -161,7 +161,7 @@ void NECannyEdge::run() _sobel->run(); // Fill border before non-maxima suppression. Nop for border mode undefined. - _border_mag_gradient.run(_border_mag_gradient.window()); + NEScheduler::get().schedule(&_border_mag_gradient, Window::DimZ); // Run gradient NEScheduler::get().schedule(_gradient.get(), Window::DimY); @@ -173,8 +173,8 @@ void NECannyEdge::run() memset(_output->buffer(), 0, _output->info()->total_size()); // Fill border before edge trace - _border_edge_trace.run(_border_edge_trace.window()); + NEScheduler::get().schedule(&_border_edge_trace, Window::DimZ); // Run edge tracing - _edge_trace.run(_edge_trace.window()); + NEScheduler::get().schedule(&_edge_trace, Window::DimY); } diff --git a/src/runtime/NEON/functions/NEConvolution.cpp b/src/runtime/NEON/functions/NEConvolution.cpp index 4ad6450c67..249274ba32 100644 --- a/src/runtime/NEON/functions/NEConvolution.cpp +++ b/src/runtime/NEON/functions/NEConvolution.cpp @@ -94,7 +94,7 @@ void NEConvolutionSquare<matrix_size>::configure(ITensor *input, ITensor *output template <unsigned int matrix_size> void NEConvolutionSquare<matrix_size>::run() { - _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_border_handler, Window::DimZ); if(_is_separable) { diff --git a/src/runtime/NEON/functions/NEDerivative.cpp b/src/runtime/NEON/functions/NEDerivative.cpp index c50db14746..81180307f6 100644 --- a/src/runtime/NEON/functions/NEDerivative.cpp +++ b/src/runtime/NEON/functions/NEDerivative.cpp @@ -47,6 +47,6 @@ void NEDerivative::configure(ITensor *input, ITensor *output_x, ITensor *output_ void NEDerivative::run() { - _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_border_handler, Window::DimZ); NEScheduler::get().schedule(&_kernel, Window::DimY); } diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp index 2e3a6835dc..810efe539f 100644 --- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp @@ -85,7 +85,7 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, void NEDirectConvolutionLayer::run() { - _input_border_handler.run(_input_border_handler.window()); + NEScheduler::get().schedule(&_input_border_handler, Window::DimZ); NEScheduler::get().schedule(&_conv_kernel, Window::DimZ); NEScheduler::get().schedule(&_accumulate_bias_kernel, Window::DimY); diff --git a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp index f6ec677e44..70b93cae9e 100644 --- a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp +++ b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp @@ -55,7 +55,7 @@ void NEEqualizeHistogram::run() NEScheduler::get().schedule(&_histogram_kernel, Window::DimY); // Calculate cumulative distribution of histogram and create LUT. - _cd_histogram_kernel.run(_cd_histogram_kernel.window()); + NEScheduler::get().schedule(&_cd_histogram_kernel, Window::DimY); // Map input to output using created LUT. NEScheduler::get().schedule(&_map_histogram_kernel, Window::DimY); diff --git a/src/runtime/NEON/functions/NEFastCorners.cpp b/src/runtime/NEON/functions/NEFastCorners.cpp index 33a58f1904..265041fc42 100644 --- a/src/runtime/NEON/functions/NEFastCorners.cpp +++ b/src/runtime/NEON/functions/NEFastCorners.cpp @@ -88,7 +88,7 @@ void NEFastCorners::configure(IImage *input, float threshold, bool nonmax_suppre void NEFastCorners::run() { - _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_border_handler, Window::DimZ); NEScheduler::get().schedule(&_fast_corners_kernel, Window::DimY); diff --git a/src/runtime/NEON/functions/NEGaussian5x5.cpp b/src/runtime/NEON/functions/NEGaussian5x5.cpp index 69639d0d43..a1ce985633 100644 --- a/src/runtime/NEON/functions/NEGaussian5x5.cpp +++ b/src/runtime/NEON/functions/NEGaussian5x5.cpp @@ -54,7 +54,7 @@ void NEGaussian5x5::configure(ITensor *input, ITensor *output, BorderMode border void NEGaussian5x5::run() { - _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_border_handler, Window::DimZ); NEScheduler::get().schedule(&_kernel_hor, Window::DimY); NEScheduler::get().schedule(&_kernel_vert, Window::DimY); } diff --git a/src/runtime/NEON/functions/NEGaussianPyramid.cpp b/src/runtime/NEON/functions/NEGaussianPyramid.cpp index e857aabd3b..90bd5842eb 100644 --- a/src/runtime/NEON/functions/NEGaussianPyramid.cpp +++ b/src/runtime/NEON/functions/NEGaussianPyramid.cpp @@ -109,7 +109,7 @@ void NEGaussianPyramidHalf::run() for(unsigned int i = 0; i < num_levels - 1; ++i) { - _border_handler[i].run(_border_handler[i].window()); + NEScheduler::get().schedule(_border_handler.get() + i, Window::DimZ); NEScheduler::get().schedule(_horizontal_reduction.get() + i, Window::DimY); NEScheduler::get().schedule(_vertical_reduction.get() + i, Window::DimY); } diff --git a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp index 8b3d01423c..1a038a2f62 100644 --- a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp +++ b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp @@ -239,6 +239,6 @@ void NEHOGMultiDetection::run() // Run non-maxima suppression kernel if enabled if(_non_maxima_suppression) { - _non_maxima_kernel->run(_non_maxima_kernel->window()); + NEScheduler::get().schedule(_non_maxima_kernel.get(), Window::DimY); } } diff --git a/src/runtime/NEON/functions/NEHarrisCorners.cpp b/src/runtime/NEON/functions/NEHarrisCorners.cpp index 24b2bcb5b3..7ec681dce9 100644 --- a/src/runtime/NEON/functions/NEHarrisCorners.cpp +++ b/src/runtime/NEON/functions/NEHarrisCorners.cpp @@ -207,8 +207,8 @@ void NEHarrisCorners::run() _sobel->run(); // Fill border before harris score kernel - _border_gx.run(_border_gx.window()); - _border_gy.run(_border_gy.window()); + NEScheduler::get().schedule(&_border_gx, Window::DimZ); + NEScheduler::get().schedule(&_border_gy, Window::DimZ); // Run harris score kernel NEScheduler::get().schedule(_harris_score.get(), Window::DimY); @@ -220,5 +220,5 @@ void NEHarrisCorners::run() NEScheduler::get().schedule(&_candidates, Window::DimY); // Run sort & euclidean distance - _sort_euclidean.run(_sort_euclidean.window()); + NEScheduler::get().schedule(&_sort_euclidean, Window::DimY); } diff --git a/src/runtime/NEON/functions/NEMeanStdDev.cpp b/src/runtime/NEON/functions/NEMeanStdDev.cpp index ab8e72bf1d..2304bc80d7 100644 --- a/src/runtime/NEON/functions/NEMeanStdDev.cpp +++ b/src/runtime/NEON/functions/NEMeanStdDev.cpp @@ -43,6 +43,6 @@ void NEMeanStdDev::run() _global_sum = 0; _global_sum_squared = 0; - _fill_border_kernel.run(_fill_border_kernel.window()); + NEScheduler::get().schedule(&_fill_border_kernel, Window::DimZ); NEScheduler::get().schedule(&_mean_stddev_kernel, Window::DimY); } diff --git a/src/runtime/NEON/functions/NESobel5x5.cpp b/src/runtime/NEON/functions/NESobel5x5.cpp index 8967a22ba1..305d21122e 100644 --- a/src/runtime/NEON/functions/NESobel5x5.cpp +++ b/src/runtime/NEON/functions/NESobel5x5.cpp @@ -75,7 +75,7 @@ void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y, void NESobel5x5::run() { - _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_border_handler, Window::DimZ); NEScheduler::get().schedule(&_sobel_hor, Window::DimY); NEScheduler::get().schedule(&_sobel_vert, Window::DimY); } diff --git a/src/runtime/NEON/functions/NESobel7x7.cpp b/src/runtime/NEON/functions/NESobel7x7.cpp index f628da9709..57fe028567 100644 --- a/src/runtime/NEON/functions/NESobel7x7.cpp +++ b/src/runtime/NEON/functions/NESobel7x7.cpp @@ -75,7 +75,7 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y, void NESobel7x7::run() { - _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_border_handler, Window::DimZ); NEScheduler::get().schedule(&_sobel_hor, Window::DimY); NEScheduler::get().schedule(&_sobel_vert, Window::DimY); } diff --git a/tests/validation_old/NEON/FillBorder.cpp b/tests/validation_old/NEON/FillBorder.cpp index ad703d97fb..277bbf2c0a 100644 --- a/tests/validation_old/NEON/FillBorder.cpp +++ b/tests/validation_old/NEON/FillBorder.cpp @@ -74,7 +74,7 @@ BOOST_DATA_TEST_CASE(FillBorder, BorderModes() * boost::unit_test::data::make({ fill_border.configure(&src, border_size, border_mode, border_value); // Run kernel - fill_border.run(fill_border.window()); + fill_border.run(fill_border.window(), ThreadInfo()); // Validate border border_size.limit(padding); |