aboutsummaryrefslogtreecommitdiff
path: root/compute_kernel_writer/include
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2023-08-17 11:04:02 +0100
committerGunes Bayir <gunes.bayir@arm.com>2023-08-30 15:45:59 +0000
commitd5f9a1cf9f0340f3e6bf9ff00156fc2adb1fdca9 (patch)
treeaf23cff1cb3a504ee51676cd9bfc74b75934fef2 /compute_kernel_writer/include
parent91cb7336400acc857e20086a23692f99fe11be9c (diff)
downloadComputeLibrary-d5f9a1cf9f0340f3e6bf9ff00156fc2adb1fdca9.tar.gz
Implement indirect load for buffer and CLImage
Add KernelWriter API functions for loading from an indirect buffer Resolves: COMPMID-6390 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Change-Id: I45dbf88b25ec5caf2b458657ef20aacac9924745 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10192 Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'compute_kernel_writer/include')
-rw-r--r--compute_kernel_writer/include/ckw/KernelWriter.h15
-rw-r--r--compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h10
2 files changed, 20 insertions, 5 deletions
diff --git a/compute_kernel_writer/include/ckw/KernelWriter.h b/compute_kernel_writer/include/ckw/KernelWriter.h
index 0c8f3de0a1..93ae8aecd6 100644
--- a/compute_kernel_writer/include/ckw/KernelWriter.h
+++ b/compute_kernel_writer/include/ckw/KernelWriter.h
@@ -267,7 +267,7 @@ public:
* @param[in] x x-coordinate
* @param[in] y y-coordinate
* @param[in] z z-coordinate
- * @param[in] batch batch offset
+ * @param[in] batch batch
*/
virtual void op_load(
const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
@@ -302,6 +302,19 @@ public:
const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
const TileOperand &dilation_x, const TileOperand &dilation_y) = 0;
+ /** Load the data from the tensor memory to the tile using the indirect buffer approach and respecting the sampling information.
+ *
+ * @param[in] tile_op The tile to be loaded.
+ * @param[in] tensor_op The tensor to be read.
+ * @param[in] sampler The tensor sampling information.
+ * @param[in] x x-coordinate
+ * @param[in] y y-coordinate
+ * @param[in] z z-coordinate
+ * @param[in] batch batch
+ */
+ virtual void op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
+ const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch_op) = 0;
+
protected:
// =============================================================================================
// ID space management
diff --git a/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h b/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h
index 3a9f4f5722..43dce1d4e4 100644
--- a/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h
+++ b/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h
@@ -22,8 +22,8 @@
* SOFTWARE.
*/
-#ifndef CKW_INCLUDE_CKW_TENSORSAMPLERTYPES_H
-#define CKW_INCLUDE_CKW_TENSORSAMPLERTYPES_H
+#ifndef CKW_INCLUDE_CKW_TYPES_TENSORSAMPLERTYPES_H
+#define CKW_INCLUDE_CKW_TYPES_TENSORSAMPLERTYPES_H
#include <cstdint>
@@ -47,6 +47,7 @@ enum class TensorSamplerFormat : int32_t
* Leftover elements can be handled using overlapping. This involves processing some of the elements in the array twice.
* ClampToBorderMaxOnly : Clamp to max value allowed in the corresponding dimension, and construct an if/else guard to prevent out of bound access,
* e.g. if( y < size-of-dimension-y ){ <do the operation> }
+ * SkipLessThanZero : Skip loading/storing if the index is less than 0
*
* Individual dimensions choose which adddress mode to implement in their respective enum classes.
*/
@@ -65,7 +66,8 @@ enum class TensorSamplerAddressModeY : int32_t
Unknown = 0,
None = 1,
OverlappingMin = 2,
- ClampToBorderMaxOnly = 3
+ ClampToBorderMaxOnly = 3,
+ SkipLessThanZero = 4
};
/**
@@ -79,4 +81,4 @@ enum class TensorSamplerAddressModeZ : int32_t
} // namespace ckw
-#endif //CKW_INCLUDE_CKW_TENSORSAMPLERTYPES_H
+#endif // CKW_INCLUDE_CKW_TYPES_TENSORSAMPLERTYPES_H