diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2023-08-17 11:04:02 +0100 |
---|---|---|
committer | Gunes Bayir <gunes.bayir@arm.com> | 2023-08-30 15:45:59 +0000 |
commit | d5f9a1cf9f0340f3e6bf9ff00156fc2adb1fdca9 (patch) | |
tree | af23cff1cb3a504ee51676cd9bfc74b75934fef2 /compute_kernel_writer/include/ckw | |
parent | 91cb7336400acc857e20086a23692f99fe11be9c (diff) | |
download | ComputeLibrary-d5f9a1cf9f0340f3e6bf9ff00156fc2adb1fdca9.tar.gz |
Implement indirect load for buffer and CLImage
Add KernelWriter API functions for loading from an indirect buffer
Resolves: COMPMID-6390
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Change-Id: I45dbf88b25ec5caf2b458657ef20aacac9924745
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10192
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'compute_kernel_writer/include/ckw')
-rw-r--r-- | compute_kernel_writer/include/ckw/KernelWriter.h | 15 | ||||
-rw-r--r-- | compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h | 10 |
2 files changed, 20 insertions, 5 deletions
diff --git a/compute_kernel_writer/include/ckw/KernelWriter.h b/compute_kernel_writer/include/ckw/KernelWriter.h index 0c8f3de0a1..93ae8aecd6 100644 --- a/compute_kernel_writer/include/ckw/KernelWriter.h +++ b/compute_kernel_writer/include/ckw/KernelWriter.h @@ -267,7 +267,7 @@ public: * @param[in] x x-coordinate * @param[in] y y-coordinate * @param[in] z z-coordinate - * @param[in] batch batch offset + * @param[in] batch batch */ virtual void op_load( const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, @@ -302,6 +302,19 @@ public: const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, const TileOperand &dilation_x, const TileOperand &dilation_y) = 0; + /** Load the data from the tensor memory to the tile using the indirect buffer approach and respecting the sampling information. + * + * @param[in] tile_op The tile to be loaded. + * @param[in] tensor_op The tensor to be read. + * @param[in] sampler The tensor sampling information. + * @param[in] x x-coordinate + * @param[in] y y-coordinate + * @param[in] z z-coordinate + * @param[in] batch batch + */ + virtual void op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, + const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch_op) = 0; + protected: // ============================================================================================= // ID space management diff --git a/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h b/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h index 3a9f4f5722..43dce1d4e4 100644 --- a/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h +++ b/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h @@ -22,8 +22,8 @@ * SOFTWARE. */ -#ifndef CKW_INCLUDE_CKW_TENSORSAMPLERTYPES_H -#define CKW_INCLUDE_CKW_TENSORSAMPLERTYPES_H +#ifndef CKW_INCLUDE_CKW_TYPES_TENSORSAMPLERTYPES_H +#define CKW_INCLUDE_CKW_TYPES_TENSORSAMPLERTYPES_H #include <cstdint> @@ -47,6 +47,7 @@ enum class TensorSamplerFormat : int32_t * Leftover elements can be handled using overlapping. This involves processing some of the elements in the array twice. * ClampToBorderMaxOnly : Clamp to max value allowed in the corresponding dimension, and construct an if/else guard to prevent out of bound access, * e.g. if( y < size-of-dimension-y ){ <do the operation> } + * SkipLessThanZero : Skip loading/storing if the index is less than 0 * * Individual dimensions choose which adddress mode to implement in their respective enum classes. */ @@ -65,7 +66,8 @@ enum class TensorSamplerAddressModeY : int32_t Unknown = 0, None = 1, OverlappingMin = 2, - ClampToBorderMaxOnly = 3 + ClampToBorderMaxOnly = 3, + SkipLessThanZero = 4 }; /** @@ -79,4 +81,4 @@ enum class TensorSamplerAddressModeZ : int32_t } // namespace ckw -#endif //CKW_INCLUDE_CKW_TENSORSAMPLERTYPES_H +#endif // CKW_INCLUDE_CKW_TYPES_TENSORSAMPLERTYPES_H |