aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2023-06-28 09:49:45 +0100
committerSiCong Li <sicong.li@arm.com>2023-07-25 15:48:50 +0000
commit23882a9014eb3972bca958206866c8e0d0b829cc (patch)
tree9139b91699099160e26a64abd8cf182bd7447278
parent0a59e69fd922b02d9e3b5b043ee7f891061df7be (diff)
downloadComputeLibrary-23882a9014eb3972bca958206866c8e0d0b829cc.tar.gz
Add GpuKernelArgumentBinding for runtime argument setting
* Add flexible runtime argument setting that accept argument bindings exported from ckw. * Introduce internal build flag ACL_INTERNAL_TEST_CKW_IN_DF. If set to true, ckw will be tested in dynamic fusion validation tests. Otherwise it will not be tested and the dynamic fusion will keep using ClTemplateWriter instead. * Fix CKW sampler for elementwise binary to deal with tile sizes > 1 in both dimensions Resolves: COMPMID-6282 Partially resolves: COMPMID-6260 Signed-off-by: SiCong Li <sicong.li@arm.com> Change-Id: I0ab225a4484eb2119643d900a4e72806558626ee Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9917 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-by: Anitha Raj <Anitha.Raj@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--SConscript9
-rw-r--r--compute_kernel_writer/prototype/include/ckw/Kernel.h11
-rw-r--r--compute_kernel_writer/prototype/src/Kernel.cpp10
-rw-r--r--filelist.json144
-rwxr-xr-xscripts/clang_tidy_rules.py3
-rw-r--r--src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp81
-rw-r--r--src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h17
-rw-r--r--src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp38
-rw-r--r--src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h10
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h128
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp6
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h10
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp8
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h6
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h16
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp16
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp43
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h30
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h44
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h105
-rw-r--r--src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h20
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp45
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h7
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp8
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h7
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp31
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp21
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp33
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp5
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h98
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h8
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp26
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h16
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp26
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h14
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp27
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h21
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp26
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h16
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp3
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp3
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp5
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp3
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Resize.cpp3
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp3
45 files changed, 906 insertions, 304 deletions
diff --git a/SConscript b/SConscript
index da7683ed6c..868bc9f8f5 100644
--- a/SConscript
+++ b/SConscript
@@ -564,7 +564,14 @@ if env['fixed_format_kernels']:
# Experimental files
# Dynamic fusion
if env['experimental_dynamic_fusion']:
- lib_files += filelist['experimental']['dynamic_fusion']
+ lib_files += filelist['experimental']['dynamic_fusion']['common']
+ lib_files += filelist['experimental']['dynamic_fusion']['template_writer']
+
+if "ACL_INTERNAL_TEST_CKW_IN_DF" in env["extra_cxx_flags"]:
+ if not env["experimental_dynamic_fusion"]:
+ print("To use ACL_INTERNAL_TEST_CKW_IN_DF experimental_dynamic_fusion must be set to 1")
+ Exit(1)
+ lib_files += filelist['experimental']['dynamic_fusion']['ckw_driver']
# Logging files
if env["logging"]:
diff --git a/compute_kernel_writer/prototype/include/ckw/Kernel.h b/compute_kernel_writer/prototype/include/ckw/Kernel.h
index 3deb2ace0d..ba31a29ba7 100644
--- a/compute_kernel_writer/prototype/include/ckw/Kernel.h
+++ b/compute_kernel_writer/prototype/include/ckw/Kernel.h
@@ -50,6 +50,11 @@ class Kernel
public:
/** Constructor
*
+ * @param[in] language The programming language to write the kernel.
+ */
+ Kernel(GpuTargetLanguage language);
+ /** Constructor
+ *
* @param[in] name The name of the kernel function.
* @param[in] language The programming language to write the kernel.
*/
@@ -61,6 +66,12 @@ public:
/** Get the name of the kernel function. */
const std::string &name() const;
+ /** Set the name of the kernel function.
+ *
+ * @param[in] name The name of the kernel function.
+ */
+ void name(const std::string &name);
+
/** Get the list of kernel arguments. */
::std::vector<KernelArgument> arguments() const;
diff --git a/compute_kernel_writer/prototype/src/Kernel.cpp b/compute_kernel_writer/prototype/src/Kernel.cpp
index 884b69afc6..095ac879f1 100644
--- a/compute_kernel_writer/prototype/src/Kernel.cpp
+++ b/compute_kernel_writer/prototype/src/Kernel.cpp
@@ -30,11 +30,17 @@
namespace ckw
{
+Kernel::Kernel(GpuTargetLanguage language)
+ : Kernel{"unnamed", language}
+{
+}
+
Kernel::Kernel(const char *name, GpuTargetLanguage language)
: _name(name), _kernel(std::make_unique<prototype::GpuKernelWriterDataHolder>(language)), _operands{}, _tensor_id_operands{}
{
}
+
Kernel::~Kernel()
{
}
@@ -44,6 +50,10 @@ const std::string &Kernel::name() const
return _name;
}
+void Kernel::name(const std::string& name)
+{
+ _name = name;
+}
std::vector<KernelArgument> Kernel::arguments() const
{
std::vector<KernelArgument> arguments;
diff --git a/filelist.json b/filelist.json
index 18fcaa81a0..b7845a760a 100644
--- a/filelist.json
+++ b/filelist.json
@@ -2286,74 +2286,80 @@
}
},
"experimental": {
- "dynamic_fusion": [
- "src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp",
- "src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp",
- "src/dynamic_fusion/sketch/attributes/CastAttributes.cpp",
- "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp",
- "src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp",
- "src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp",
- "src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp",
- "src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp",
- "src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp",
- "src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp",
- "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp",
- "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp",
- "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp",
- "src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp",
- "src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp",
- "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp",
- "src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp",
- "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp",
- "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp",
- "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp",
- "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp",
- "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp",
- "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp",
- "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp",
- "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp",
- "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp",
- "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp",
- "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp",
- "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp",
- "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp",
- "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp",
- "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp",
- "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp",
- "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp",
- "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp",
- "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp",
- "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp",
- "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp",
- "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp",
- "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp",
- "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp",
- "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp",
- "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp",
- "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp",
- "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp",
- "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp",
- "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp",
- "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp",
- "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp",
- "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp",
- "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp",
- "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp"
- ]
+ "dynamic_fusion": {
+ "common": [
+ "src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp",
+ "src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp",
+ "src/dynamic_fusion/sketch/attributes/CastAttributes.cpp",
+ "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp",
+ "src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp",
+ "src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp",
+ "src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp",
+ "src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp",
+ "src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp",
+ "src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp",
+ "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp",
+ "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp",
+ "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp",
+ "src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp",
+ "src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp",
+ "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp",
+ "src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp",
+ "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp"
+ ],
+ "template_writer": [
+ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp",
+ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp",
+ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp",
+ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp",
+ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp",
+ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp",
+ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp",
+ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp",
+ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp",
+ "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp"
+ ],
+ "ckw_driver": [
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp",
+ "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp"
+ ]
+ }
}
}
diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py
index 11ca6c45f5..1e1ab7f545 100755
--- a/scripts/clang_tidy_rules.py
+++ b/scripts/clang_tidy_rules.py
@@ -43,6 +43,9 @@ def get_list_flags( filename, arch):
flags.append("-DARM_COMPUTE_OPENCL_ENABLED")
if arch == "aarch64":
flags.append("-DARM_COMPUTE_AARCH64_V8_2")
+ if "ckw_driver" in filename:
+ flags.append("-DACL_INTERNAL_TEST_CKW_IN_DF")
+
return flags
def filter_files( list_files ):
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
index 6a57482bb2..92ca8557f1 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
+++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
@@ -24,6 +24,9 @@
#include "ClKernelRuntime.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "src/core/CL/CLUtils.h"
+#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
+#include "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h"
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
#include "src/gpu/cl/ClKernelLibrary.h"
@@ -57,6 +60,8 @@ void ClKernelRuntime::configure(const ClCompileContext &compile_ctx, const GpuKe
_arguments = code.arguments();
}
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+
inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKernelArgumentInfo &arg, const ICLTensor *tensor, const Window &arg_slice, std::vector<cl::Image2D> &cl_images)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
@@ -163,21 +168,65 @@ inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKer
}
}
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+inline void ClKernelRuntime::add_kernel_argument(unsigned int &idx, const GpuKernelArgumentBinding &arg, const ICLTensor *tensor, std::vector<cl::Image2D> &cl_images)
+{
+ switch(arg.type())
+ {
+ case GpuKernelArgumentBinding::Type::TensorStorage:
+ {
+ switch(arg.tensor_storage_type())
+ {
+ case TensorStorageType::ClBufferUint8Ptr:
+ {
+ cl_add_buffer_argument(_kernel, idx, tensor->cl_buffer());
+ break;
+ }
+ case TensorStorageType::ClImage2dReadOnly:
+ {
+ cl::Image2D tensor_image2d = create_image2d_from_tensor(tensor, CLImage2DType::ReadOnly);
+ cl_images.push_back(tensor_image2d);
+ cl_add_texture_argument(_kernel, idx, tensor_image2d);
+ break;
+ }
+ case TensorStorageType::ClImage2dWriteOnly:
+ {
+ cl::Image2D tensor_image2d = create_image2d_from_tensor(tensor, CLImage2DType::WriteOnly);
+ cl_images.push_back(tensor_image2d);
+ cl_add_texture_argument(_kernel, idx, tensor_image2d);
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Do not accept other TensorStorageType");
+ break;
+ }
+ }
+ break;
+ }
+ case GpuKernelArgumentBinding::Type::TensorComponent:
+ {
+ cl_add_tensor_component_argument(_kernel, idx, tensor, arg.tensor_component_type());
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Do not accept other types of kernel arguments");
+ break;
+ }
+ }
+}
+
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
Window slice = window.first_slice_window_3D();
- // Don't slice matrix along the z dimension if matrix has just 2 dimensions and matrix A more than 2
- // This scenario can happen when the matrix multiplication is used to perform a convolution operation
- Window slice_fixed_z = slice;
- slice_fixed_z.set(Window::DimX, Window::Dimension(0, 1, 1));
- slice_fixed_z.set(Window::DimY, Window::Dimension(0, 1, 1));
/// NOTE: Parameters extracted from old kernels. So far they seem to be constant
/// but we may need to make them into another configuration passed from GpuWorkloadSourceCode if needed in the future
- constexpr bool slide_along_dimz = true;
constexpr bool skip_sliding_window = false;
constexpr bool use_dummy_work_items = false;
@@ -185,23 +234,27 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com
do
{
// Set kernel arguments
- Window arg_slice = slice;
// CLImages created from tensor arguments. Need to be retained until enqueue
std::vector<cl::Image2D> cl_images;
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
for(auto id_arg : _arguments)
{
const auto arg = id_arg.second;
auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(id_arg.first));
ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info());
- if(!slide_along_dimz)
- {
- // The stride_z for matrix must be zero if we do not slice
- ARM_COMPUTE_ERROR_ON(tensor->info()->strides_in_bytes()[3] != 0);
- arg_slice = slice_fixed_z;
- }
- add_tensor_argument(idx, *arg.kernel_argument_info(), tensor, arg_slice, cl_images);
+ add_tensor_argument(idx, *arg.kernel_argument_info(), tensor, slice, cl_images);
+ }
+
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ for(const auto &arg : _arguments)
+ {
+ auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(arg.id()));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info());
+ add_kernel_argument(idx, arg, tensor, cl_images);
}
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
// Dispatch kernel
enqueue(queue, *this, slice, lws_hint(), use_dummy_work_items);
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
index 4787acabcd..92e73503ce 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
+++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,8 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/IClKernel.h"
+#include <vector>
+
namespace arm_compute
{
namespace experimental
@@ -57,6 +59,7 @@ public:
virtual void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
private:
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
/** Set a kernel tensor argument
*
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
@@ -66,9 +69,19 @@ private:
* @param[out] cl_images Extra cl images created from the tensor (will need to be retained until the kernel is enqueued)
*/
inline void add_tensor_argument(unsigned int &idx, const GpuKernelArgumentInfo &arg, const ICLTensor *tensor, const Window &arg_slice, std::vector<cl::Image2D> &cl_images);
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ /** Set a kernel argument as part of a tensor
+ *
+ * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] arg Kernel argument binding, as part of @p tensor
+ * @param[in] tensor Tensor of which the kernel argument @p arg is a part of
+ * @param[out] cl_images Extra cl images created from the tensor (will need to be retained until the kernel is enqueued)
+ */
+ inline void add_kernel_argument(unsigned int &idx, const GpuKernelArgumentBinding &arg, const ICLTensor *tensor, std::vector<cl::Image2D> &cl_images);
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
private:
- GpuKernelArgumentList _arguments{}; /** All kernel arguments required by the runtime */
+ GpuKernelArgumentList _arguments{};
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp
index b273c2a20c..84fb279237 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp
+++ b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp
@@ -26,7 +26,11 @@
namespace arm_compute
{
-void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, ICLTensor *tensor, ckw::TensorComponentType component)
+namespace experimental
+{
+namespace dynamic_fusion
+{
+void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, const ICLTensor *tensor, TensorComponentType component)
{
ARM_COMPUTE_ERROR_ON(tensor == nullptr);
@@ -35,49 +39,49 @@ void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, ICL
switch(component)
{
- case ckw::TensorComponentType::OffsetFirstElement:
+ case TensorComponentType::OffsetFirstElement:
kernel.setArg<cl_uint>(idx++, info->offset_first_element_in_bytes());
break;
- case ckw::TensorComponentType::Stride0:
+ case TensorComponentType::Stride0:
kernel.setArg<cl_uint>(idx++, strides[0]);
break;
- case ckw::TensorComponentType::Stride1:
+ case TensorComponentType::Stride1:
kernel.setArg<cl_uint>(idx++, strides[1]);
break;
- case ckw::TensorComponentType::Stride2:
+ case TensorComponentType::Stride2:
kernel.setArg<cl_uint>(idx++, strides[2]);
break;
- case ckw::TensorComponentType::Stride3:
+ case TensorComponentType::Stride3:
kernel.setArg<cl_uint>(idx++, strides[3]);
break;
- case ckw::TensorComponentType::Stride4:
+ case TensorComponentType::Stride4:
kernel.setArg<cl_uint>(idx++, strides[4]);
break;
- case ckw::TensorComponentType::Dim0:
+ case TensorComponentType::Dim0:
kernel.setArg<cl_uint>(idx++, info->dimension(0));
break;
- case ckw::TensorComponentType::Dim1:
+ case TensorComponentType::Dim1:
kernel.setArg<cl_uint>(idx++, info->dimension(1));
break;
- case ckw::TensorComponentType::Dim2:
+ case TensorComponentType::Dim2:
kernel.setArg<cl_uint>(idx++, info->dimension(2));
break;
- case ckw::TensorComponentType::Dim3:
+ case TensorComponentType::Dim3:
kernel.setArg<cl_uint>(idx++, info->dimension(3));
break;
- case ckw::TensorComponentType::Dim4:
+ case TensorComponentType::Dim4:
kernel.setArg<cl_uint>(idx++, info->dimension(4));
break;
- case ckw::TensorComponentType::Dim1xDim2:
+ case TensorComponentType::Dim1xDim2:
kernel.setArg<cl_uint>(idx++, info->dimension(1) * info->dimension(2));
break;
- case ckw::TensorComponentType::Dim2xDim3:
+ case TensorComponentType::Dim2xDim3:
kernel.setArg<cl_uint>(idx++, info->dimension(2) * info->dimension(3));
break;
- case ckw::TensorComponentType::Dim1xDim2xDim3:
+ case TensorComponentType::Dim1xDim2xDim3:
kernel.setArg<cl_uint>(idx++, info->dimension(1) * info->dimension(2) * info->dimension(3));
break;
- case ckw::TensorComponentType::Unknown:
+ case TensorComponentType::Unknown:
default:
ARM_COMPUTE_ERROR("Unknown tensor component");
}
@@ -93,4 +97,6 @@ void cl_add_texture_argument(cl::Kernel &kernel, unsigned int &idx, const cl::Im
kernel.setArg(idx++, image);
}
+} // namespace dynamic_fusion
+} // namespace experimental
} // namespace arm_compute
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h
index 3c785732a5..4cbb157a48 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h
+++ b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h
@@ -27,10 +27,14 @@
#include "arm_compute/core/CL/ICLTensor.h"
-#include "ckw/TensorInfo.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
namespace arm_compute
{
+namespace experimental
+{
+namespace dynamic_fusion
+{
/** Select a Compute Kernel Writer tensor component from a tensor and add to the kernel's arguments at the specified index idx.
*
* @param[in,out] kernel OpenCL kernel to configure with the provided argument.
@@ -38,7 +42,7 @@ namespace arm_compute
* @param[in] tensor Tensor from which to access the tensor component.
* @param[in] component Tensor component to select such as tensor dimensions, strides, etc.
*/
-void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, ICLTensor *tensor, ckw::TensorComponentType component);
+void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, const ICLTensor *tensor, TensorComponentType component);
/** Add an OpenCL buffer object to the kernel's arguments at the specified index @p idx.
*
@@ -56,6 +60,8 @@ void cl_add_buffer_argument(cl::Kernel &kernel, unsigned int &idx, const cl::Buf
*/
void cl_add_texture_argument(cl::Kernel &kernel, unsigned int &idx, const cl::Image &image);
+} // namespace dynamic_fusion
+} // namespace experimental
} // namespace arm_compute
#endif /* ACL_SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CKW_DRIVER_GPUCKWKERNELARGUMENTSHELPERS */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
index 302d4c8562..226e1a2df3 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
@@ -33,6 +33,7 @@ namespace experimental
namespace dynamic_fusion
{
/** Contain information required to set up a kernel argument at run time
+ * @deprecated To be removed along with ClTemplateWriter
*/
struct GpuKernelArgumentInfo
{
@@ -66,10 +67,9 @@ struct GpuKernelArgumentInfo
}
Type type{ Type::Tensor_4D_t_Buffer };
};
-
bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1);
-
/** Kernel argument information linked with its corresponding @ref ITensorInfo
+ * @deprecated To be removed along with ClTemplateWriter
*/
class GpuKernelArgument
{
@@ -124,6 +124,130 @@ private:
TensorInfo _tensor_info{};
GpuKernelArgumentInfo _kernel_arg_info{};
};
+#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
+/** Describe how the tensor runtime memory can be accessed
+ *
+ * Please see documentation under @ref GpuKernelArgumentBinding
+ */
+enum class TensorStorageType
+{
+ Unknown,
+ ClBufferUint8Ptr,
+ ClImage2dReadOnly,
+ ClImage2dWriteOnly,
+};
+
+/** Describe additional runtime information about the tensor
+ *
+ * Please see documentation under @ref GpuKernelArgumentBinding
+ */
+enum class TensorComponentType
+{
+ Unknown,
+ OffsetFirstElement,
+ Stride0,
+ Stride1,
+ Stride2,
+ Stride3,
+ Stride4,
+ Dim0,
+ Dim1,
+ Dim2,
+ Dim3,
+ Dim4,
+ Dim1xDim2,
+ Dim2xDim3,
+ Dim1xDim2xDim3,
+};
+
+/** Describe how to extract information from a runtime Gpu tensor, and set it as an argument to a gpu kernel at runtime
+ *
+ * A kernel argument is just an argument to the gpu kernel as shown in the argument list below. This contrasts with a "workload argument" which is a tensor (@ref GpuWorkloadArgument)
+ * void kernel(arg0, arg1, ... argN)
+ *
+ * In a kernel generated using dynamic fusion (@ref GpuKernelSourceCode), every kernel argument describes part of a tensor.
+ * A tensor is described as: **storages** followed by **components**
+ *
+ * A storage (@ref TensorStorageType) describes how the tensor runtime memory can be accessed (e.g. via a global uint8 pointer to a CL buffer)
+ * A component (@ref TensorComponentType) describes additional runtime information about the tensor (e.g. the dimensions of the tensor)
+ *
+ * The arguments are arranged in the order of use in the generated kernel code:
+ *
+ * arg0 , arg1 , arg2 , ..., , argN
+ * storage, component0, component1, ..., componentX, storage, component0, component1, ..., componentY
+ * | tensor0 | tensor1 |
+ *
+ * An example argument list:
+ *
+ * void kernel(
+ * image2d_t t0_image, // TensorStorageType::ClImage2dReadOnly
+ * uint8_t* t0_ptr, // TensorStorageType::ClBufferUint8Ptr
+ * uint t0_dim0, // TensorComponentType::Dim0
+ * uint t0_stride1, // TensorComponentType::Stride1
+ * image2d_t t1_ptr, // TensorStorageType::ClImage2dReadOnly
+ * uint t1_dim1xdim2, // TensorComponentType::Dim1xDim2
+ * uint t1_stride1, // TensorComponentType::Stride1
+ * uint t1_stride2, // TensorComponentType:Stride2
+ * )
+ *
+ */
+class GpuKernelArgumentBinding
+{
+public:
+ enum class Type : int32_t
+ {
+ TensorStorage, /** @ref TensorStorageType */
+ TensorComponent /** @ref TensorComponentType */
+ };
+ GpuKernelArgumentBinding(ITensorInfo::Id id, TensorStorageType storage)
+ : _type{ Type::TensorStorage }, _id{ id }, _value{}
+ {
+ _value.tensor_storage_type = storage;
+ }
+ GpuKernelArgumentBinding(ITensorInfo::Id id, TensorComponentType component)
+ : _type{ Type::TensorComponent }, _id{ id }, _value{}
+ {
+ _value.tensor_component_type = component;
+ }
+ /** Storage type of the tensor
+ */
+ TensorStorageType tensor_storage_type() const
+ {
+ ARM_COMPUTE_ERROR_ON(_type != Type::TensorStorage);
+ return _value.tensor_storage_type;
+ }
+ /** Component of the tensor
+ */
+ TensorComponentType tensor_component_type() const
+ {
+ ARM_COMPUTE_ERROR_ON(_type != Type::TensorComponent);
+ return _value.tensor_component_type;
+ }
+ /** Id of the tensor this kernel argument belongs to
+ */
+ ITensorInfo::Id id() const
+ {
+ return _id;
+ }
+ /** Type of the kernel argument
+ */
+ Type type() const
+ {
+ return _type;
+ }
+
+private:
+ Type _type;
+ ITensorInfo::Id _id;
+ union Value
+ {
+ TensorStorageType tensor_storage_type;
+ TensorComponentType tensor_component_type;
+ };
+ Value _value;
+};
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
+
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
index b70a192775..5a65ede38b 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
@@ -44,14 +44,14 @@ std::vector<DependencyGraph::TensorId> GpuKernelComponentGraph::get_tensor_ids(c
return tensor_ids;
}
-GpuKernelComponentGraph::GpuKernelComponentGraph(GpuComponentServices *services)
- : _services{ services }, _components{}, _tensors{}, _dependency_graph{}
+GpuKernelComponentGraph::GpuKernelComponentGraph(GpuWorkloadContext *context, GpuComponentServices *services)
+ : _context{ context }, _services{ services }, _components{}, _tensors{}, _dependency_graph{}
{
}
GpuKernelComponentStream GpuKernelComponentGraph::fuse(const MemoryDescriptorMap &mem_map) const
{
- GpuKernelComponentStream stream{ _services, mem_map };
+ GpuKernelComponentStream stream{ _context, _services, mem_map };
const auto op_seq = _dependency_graph.build_operators_sequence();
stream.new_component_group();
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
index 8314ea0a50..85c9b45840 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
@@ -49,9 +49,10 @@ class GpuKernelComponentGraph
public:
/** Constructor
*
+ * @param[in] context @ref GpuWorkloadContext to be used by the graph
* @param[in] services @ref GpuComponentServices to be used by the graph
*/
- GpuKernelComponentGraph(GpuComponentServices *services);
+ GpuKernelComponentGraph(GpuWorkloadContext *context, GpuComponentServices *services);
/** Prevent instances of this class from being copy constructed */
GpuKernelComponentGraph(const GpuKernelComponentGraph &graph) = delete;
/** Prevent instances of this class from being copied */
@@ -98,6 +99,7 @@ public:
private:
static std::vector<DependencyGraph::TensorId> get_tensor_ids(const std::vector<const ITensorInfo *> tensors);
+ GpuWorkloadContext *_context;
GpuComponentServices *_services;
std::map<ComponentId, std::unique_ptr<IGpuKernelComponent>> _components;
std::map<ITensorInfo::Id, const ITensorInfo *> _tensors;
@@ -106,4 +108,4 @@ private:
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH */
+#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
index 8f4eadc477..a2b6623370 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -33,8 +33,8 @@ namespace experimental
{
namespace dynamic_fusion
{
-GpuKernelComponentStream::GpuKernelComponentStream(GpuComponentServices *services, const MemoryDescriptorMap &mem_map)
- : _services{ services }, _component_groups{}, _mem_map{ mem_map }
+GpuKernelComponentStream::GpuKernelComponentStream(GpuWorkloadContext *context, GpuComponentServices *services, const MemoryDescriptorMap &mem_map)
+ : _context{ context }, _services{ services }, _component_groups{}, _mem_map{ mem_map }
{
}
@@ -51,7 +51,7 @@ GpuWorkloadSourceCode GpuKernelComponentStream::write_workload_code()
const GpuKernelSourceCode kernel_code = logical_kernel.write_kernel_code();
// The whole unit workload stage is determined by the root component
const auto unit_workload_stage = group.get_root_component()->properties().stage();
- source_code.add_unit_workload(kernel_code, unit_workload_stage, _mem_map);
+ source_code.add_unit_workload(kernel_code, unit_workload_stage, _mem_map, _context);
}
return source_code;
}
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
index cbaa7c297b..ba2503a938 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,10 +49,11 @@ class GpuKernelComponentStream
public:
/** Constructor
*
+ * @param[in] context @ref GpuWorkloadContext to be used throughout the stream
* @param[in] services @ref GpuComponentServices to be used throughout the stream
* @param[in] mem_map @ref MemoryDescriptor map used to assemble the @ref GpuWorkloadSourceCode
*/
- GpuKernelComponentStream(GpuComponentServices *services, const MemoryDescriptorMap &mem_map);
+ GpuKernelComponentStream(GpuWorkloadContext *context, GpuComponentServices *services, const MemoryDescriptorMap &mem_map);
/** Allow instances of this class to be copy constructed */
GpuKernelComponentStream(const GpuKernelComponentStream &stream) = default;
/** Allow instances of this class to be copied */
@@ -78,6 +79,7 @@ public:
bool add_component(IGpuKernelComponent *component);
private:
+ GpuWorkloadContext *_context;
GpuComponentServices *_services;
std::vector<GpuKernelComponentGroup> _component_groups{};
MemoryDescriptorMap _mem_map{};
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
index 7479328d7b..64e1cdc3bc 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,14 +21,18 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
#include "arm_compute/core/CL/CLCompileContext.h"
#include "arm_compute/core/Window.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
#include <map>
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+#include <deque>
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
#include <string>
namespace arm_compute
@@ -38,7 +42,11 @@ namespace experimental
namespace dynamic_fusion
{
/** The argument list of a @ref GpuKernelSourceCode */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
using GpuKernelArgumentList = std::map<ITensorInfo::Id, GpuKernelArgument>;
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+using GpuKernelArgumentList = std::deque<GpuKernelArgumentBinding>;
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Container of kernel code to be compiled and run in a @ref GpuUnitWorkload
*/
@@ -123,4 +131,4 @@ private:
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE */
+#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
index 00f625de28..c99984fc0e 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,7 +29,11 @@
#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h"
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h"
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h"
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -46,11 +50,19 @@ GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKern
GpuKernelSourceCode GpuLogicalKernel::write_kernel_code()
{
GpuKernelSourceCode code;
- ClTemplateWriter writer{ _comp_group };
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+ ClTemplateWriter writer { _comp_group };
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ GpuCkwDriver writer { _comp_group };
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
code.name(writer.get_name());
code.code(writer.get_code());
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
code.arguments(writer.get_tensors());
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ code.arguments(writer.get_kernel_arguments());
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
code.build_options(writer.get_build_options());
code.config_id(writer.get_config_id());
code.window(writer.get_window());
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
index 50f34d9c14..c2bd012703 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
@@ -32,9 +32,8 @@ namespace experimental
{
namespace dynamic_fusion
{
-
GpuWorkloadContext::GpuWorkloadContext(CLCompileContext *cl_compile_ctx)
- : _impl { std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx) }
+ : _impl{ std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx) }
{
}
@@ -75,8 +74,7 @@ const GpuWorkloadContext::Impl &GpuWorkloadContext::implementation() const
}
GpuWorkloadContext::Impl::Impl(GpuLanguage gpu_language, CLCompileContext *cl_compile_ctx)
- : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx),
- _next_tensor_id(1), _mem_map()
+ : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx), _next_tensor_id(1), _mem_map(), _managed_tensor_info()
{
}
@@ -103,26 +101,39 @@ void GpuWorkloadContext::Impl::register_user_tensor(ITensorInfo &tensor_info)
tensor_info.set_id(tensor_id);
_mem_map[tensor_id] = MemoryDescriptor{ MemoryType::User };
+ // Save a *copy* of the user tensor info in workload context for future reference
+ // Note that this means if the user modifies the @p tensor_info, the change will not be reflected in the context
+ _managed_tensor_info.emplace(tensor_info.id(), std::make_unique<TensorInfo>(tensor_info));
}
-void GpuWorkloadContext::Impl::register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info)
+ITensorInfo *GpuWorkloadContext::Impl::create_virtual_tensor()
{
- ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id());
-
- const auto tensor_id = next_tensor_id();
-
- tensor_info.set_id(tensor_id);
- _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, mem_info };
+ auto tensor_info = std::make_unique<TensorInfo>();
+ const auto tensor_id = -next_tensor_id();
+ tensor_info->set_id(tensor_id);
+ _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual };
+ auto inserted = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info));
+ return inserted.first->second.get();
}
-void GpuWorkloadContext::Impl::register_virtual_tensor(ITensorInfo &tensor_info)
+ITensorInfo *GpuWorkloadContext::Impl::create_auxiliary_tensor(const ITensorInfo &itensor_info)
{
- ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id());
+ auto tensor_info = std::make_unique<TensorInfo>(itensor_info);
+ const auto tensor_id = next_tensor_id();
+ tensor_info->set_id(tensor_id);
+ _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ tensor_info->total_size() } };
+ auto inserted = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info));
+ return inserted.first->second.get();
+}
- const auto tensor_id = -next_tensor_id();
+ITensorInfo *GpuWorkloadContext::Impl::get_tensor_info(ITensorInfo::Id id)
+{
+ return _managed_tensor_info.at(id).get();
+}
- tensor_info.set_id(tensor_id);
- _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual };
+const ITensorInfo *GpuWorkloadContext::Impl::get_tensor_info(ITensorInfo::Id id) const
+{
+ return _managed_tensor_info.at(id).get();
}
ITensorInfo::Id GpuWorkloadContext::Impl::next_tensor_id()
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h
index a857932791..c169476a70 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h
@@ -36,7 +36,6 @@ namespace experimental
{
namespace dynamic_fusion
{
-
/** Internal implementation of workload context. */
class GpuWorkloadContext::Impl
{
@@ -52,7 +51,7 @@ public:
Impl(Impl &) = default;
/** Assignment */
- Impl& operator=(Impl &) = default;
+ Impl &operator=(Impl &) = default;
/** Get target GPU language. */
GpuLanguage gpu_language() const;
@@ -69,27 +68,34 @@ public:
*/
void register_user_tensor(ITensorInfo &tensor_info);
- /** Set a new ID and register the auxiliary tensor info.
+ /** Create a virtual (see @ref MemoryType) tensor info and save it
*
- * @param[in, out] tensor_info The tensor info to be registered.
- * @param[in] mem_info The auxiliary tensor memory info.
+ * @return ITensorInfo* The created virtual tensor info object pointer
*/
- void register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info);
-
- /** Set a new ID and register the virtual tensor info.
+ ITensorInfo *create_virtual_tensor();
+ /** Create an auxiliary (see @ref MemoryType) tensor info and save it
*
- * @param[in, out] tensor_info The tensor info to be registered.
+ * @param[in] tensor_info @ref ITensorInfo to copy from
+ *
+ * @return ITensorInfo* The created auxiliary tensor info object pointer
*/
- void register_virtual_tensor(ITensorInfo &tensor_info);
+ ITensorInfo *create_auxiliary_tensor(const ITensorInfo &tensor_info);
+
+ /** Get tensor info created by this context, from id */
+ ITensorInfo *get_tensor_info(ITensorInfo::Id id);
+
+ /** Get tensor info created by this context, from id */
+ const ITensorInfo *get_tensor_info(ITensorInfo::Id id) const;
private:
ITensorInfo::Id next_tensor_id();
- GpuLanguage _gpu_language;
+ GpuLanguage _gpu_language;
CLCompileContext *_cl_compile_ctx;
- ITensorInfo::Id _next_tensor_id;
+ ITensorInfo::Id _next_tensor_id;
MemoryDescriptorMap _mem_map;
+ std::map<ITensorInfo::Id, std::unique_ptr<TensorInfo>> _managed_tensor_info;
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
index 44c99e844b..d3033898e9 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
@@ -26,13 +26,10 @@
#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h"
#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h"
#include "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h"
-
-#include <memory>
-#include <vector>
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h"
namespace arm_compute
{
@@ -52,9 +49,8 @@ public:
Context *context)
: _context{ context },
_comp_services{},
- _component_graph{ &_comp_services },
- _operator_group{},
- _managed_tensor_info_list{ std::vector<std::unique_ptr<TensorInfo>>() }
+ _component_graph{ _context, &_comp_services },
+ _operator_group{}
{
}
/** Prevent instances of this class from being copy constructed */
@@ -90,10 +86,6 @@ public:
{
return _operator_group;
}
- ITensorInfo::Id allocate_new_tensor_id()
- {
- return ++_next_id;
- }
/** Generate @ref GpuWorkloadSourceCode from the workload sketch
* @note The sketch must be valid. Any error encountered during the building of the code will be thrown.
*
@@ -110,37 +102,29 @@ public:
*/
ITensorInfo *create_virtual_tensor()
{
- auto uptr = std::make_unique<TensorInfo>();
- _context->implementation().register_virtual_tensor(*uptr);
- _managed_tensor_info_list.emplace_back(std::move(uptr));
- return _managed_tensor_info_list.back().get();
+ return _context->implementation().create_virtual_tensor();
}
/** Create an auxiliary (see @ref MemoryType) tensor info and save it
*
- * @return ITensorInfo* The created auxiliary tensor info object pointer
- */
-
- /** Create an auxiliary (see @ref MemoryType) tensor info and save it
- *
* @param[in] tensor_info @ref ITensorInfo to copy from
*
* @return ITensorInfo* The created auxiliary tensor info object pointer
*/
ITensorInfo *create_auxiliary_tensor(const ITensorInfo &tensor_info)
{
- auto uptr = std::make_unique<TensorInfo>(tensor_info);
- _context->implementation().register_aux_tensor(*uptr, AuxMemoryInfo{ uptr->total_size() });
- _managed_tensor_info_list.emplace_back(std::move(uptr));
- return _managed_tensor_info_list.back().get();
+ return _context->implementation().create_auxiliary_tensor(tensor_info);
+ }
+
+ ITensorInfo *get_tensor_info(ITensorInfo::Id id)
+ {
+ return _context->implementation().get_tensor_info(id);
}
private:
- Context *_context;
- GpuComponentServices _comp_services;
- GpuKernelComponentGraph _component_graph;
- GpuOperatorGroup _operator_group;
- ITensorInfo::Id _next_id{ ITensorInfo::invalid_tensor_id };
- std::vector<std::unique_ptr<TensorInfo>> _managed_tensor_info_list;
+ Context *_context;
+ GpuComponentServices _comp_services;
+ GpuKernelComponentGraph _component_graph;
+ GpuOperatorGroup _operator_group;
};
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
index d1d0bdf77f..578366daaf 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
@@ -27,6 +27,7 @@
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h"
namespace arm_compute
{
@@ -34,10 +35,45 @@ namespace experimental
{
namespace dynamic_fusion
{
+#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
+namespace
+{
+/** Extract kernel arguments of one tensor from a flat list of kernel arguments.
+ *
+ * @param[in] flat_kernel_args
+ * @return GpuKernelArgumentList
+ */
+GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList &flat_kernel_args)
+{
+ if(flat_kernel_args.empty())
+ {
+ return {};
+ }
+ GpuKernelArgumentList tensor_kargs{};
+
+ const GpuKernelArgumentBinding &karg_head = flat_kernel_args.front();
+ tensor_kargs.push_back(karg_head);
+ flat_kernel_args.pop_front();
+ const auto tensor_id = karg_head.id();
+
+ while(!flat_kernel_args.empty())
+ {
+ const GpuKernelArgumentBinding &karg = flat_kernel_args.front();
+ if(karg.id() != tensor_id) // Encounter the next tensor, return the current tensor's kernel arguments
+ {
+ return tensor_kargs;
+ }
+ tensor_kargs.push_back(karg);
+ flat_kernel_args.pop_front();
+ }
+ return tensor_kargs;
+}
+}
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */
using UnitWorkloadId = int32_t;
-/** Describes all the info related to a kernel in order to:
+/** Describes all the info related to a **workload argument** (tensor) in order to:
* - be used by runtime to configure gpu kernel argument
* - be used by memory managers to allocate required memory
*/
@@ -46,6 +82,7 @@ class GpuWorkloadArgument
public:
/** Default constructor */
GpuWorkloadArgument() = default;
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
/** Constructor
*
* @param[in] tensor_info @ref ITensorInfo of the workload argument
@@ -60,6 +97,22 @@ public:
_kernel_arg_info{ kernel_arg_info }
{
}
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ /** Constructor
+ *
+ * @param[in] tensor_info @ref ITensorInfo of the workload argument
+ * @param[in] mem_desc @ref MemoryDescriptor of the workload argument
+ * @param[in] kernel_arg_list @ref GpuKernelArgumentList of the workload argument
+ */
+ GpuWorkloadArgument(const ITensorInfo &tensor_info,
+ const MemoryDescriptor &mem_desc,
+ const GpuKernelArgumentList &kernel_args)
+ : _tensor_info{ tensor_info },
+ _mem_desc{ mem_desc },
+ _kernel_args{ kernel_args }
+ {
+ }
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Get tensor id within workload */
ITensorInfo::Id id() const
{
@@ -85,6 +138,7 @@ public:
{
return &_mem_desc;
}
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
/** Get @ref GpuKernelArgumentInfo of the argument */
GpuKernelArgumentInfo *kernel_argument_info()
{
@@ -95,6 +149,18 @@ public:
{
return &_kernel_arg_info;
}
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ /** Get @ref GpuKernelArgumentList of the workload tensor */
+ GpuKernelArgumentList *kernel_argument_list()
+ {
+ return &_kernel_args;
+ }
+ /** Get @ref GpuKernelArgumentList of the workload tensor */
+ const GpuKernelArgumentList *kernel_argument_list() const
+ {
+ return &_kernel_args;
+ }
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Check if the workload argument has valid id
*
* @return true If has valid id
@@ -106,9 +172,13 @@ public:
}
private:
- TensorInfo _tensor_info{};
- MemoryDescriptor _mem_desc{};
- GpuKernelArgumentInfo _kernel_arg_info{};
+ TensorInfo _tensor_info{};
+ MemoryDescriptor _mem_desc{};
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+ GpuKernelArgumentInfo _kernel_arg_info {};
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ GpuKernelArgumentList _kernel_args {};
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
/** Describes when a unit workload is run.
@@ -179,15 +249,18 @@ public:
* @param[in] kernel_code @ref GpuKernelSourceCode to be contained within the unit workload
* @param[in] stage Stage of the unit workload
* @param[in] mem_map @ref MemoryDescriptor map for all tensors within the unit workload
+ * @param[in] context @ref GpuWorkloadContext associated with the unit workload
*
* @return UnitWorkloadId Allocated unit workload id
*/
- UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage, const MemoryDescriptorMap &mem_map)
+ UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage, const MemoryDescriptorMap &mem_map, const GpuWorkloadContext *context)
{
// Use the size of the kernel codes as Id
const auto uwk_id = static_cast<UnitWorkloadId>(_unit_workloads.size());
const auto unit_work = GpuUnitWorkload(uwk_id, kernel_code, stage);
_unit_workloads.push_back(unit_work);
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+ ARM_COMPUTE_UNUSED(context);
// Assemble kernel argument with memory descriptor to form workload argument
for(const auto &id_arg : kernel_code.arguments())
{
@@ -200,6 +273,28 @@ public:
}
_tensor_uwork_map[arg_id].insert(uwk_id);
}
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ GpuKernelArgumentList flat_kernel_args = kernel_code.arguments();
+ GpuKernelArgumentList tensor_kargs{};
+ while(true)
+ {
+ tensor_kargs = extract_kernel_args_for_one_tensor(flat_kernel_args);
+ if(tensor_kargs.empty())
+ {
+ break;
+ }
+ else
+ {
+ const auto tensor_id = tensor_kargs.at(0).id();
+ _workload_arguments[tensor_id] = GpuWorkloadArgument{ *context->implementation().get_tensor_info(tensor_id), mem_map.at(tensor_id), tensor_kargs };
+ if(_tensor_uwork_map.find(tensor_id) == _tensor_uwork_map.end())
+ {
+ _tensor_uwork_map[tensor_id] = std::set<UnitWorkloadId>();
+ }
+ _tensor_uwork_map[tensor_id].insert(uwk_id);
+ }
+ }
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
return uwk_id;
}
/** Get a unit workload from its id */
diff --git a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
index ae67790b4b..28e5432224 100644
--- a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
+++ b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,9 +27,11 @@
#include "arm_compute/core/CL/CLCompileContext.h"
#include "arm_compute/core/Window.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
#include <map>
#include <string>
+#include <vector>
namespace arm_compute
{
@@ -56,8 +58,20 @@ public:
virtual std::string get_config_id() = 0;
/** Generate execution window */
virtual Window get_window() const = 0;
- /** Get the kernel argument lists of the kernel*/
- virtual std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() = 0;
+ /** Get the kernel argument lists of the kernel
+ * @deprecated To be removed along with ClTemplateWriter
+ */
+ virtual std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors()
+ {
+ return {};
+ }
+#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
+ /** Get the flat list of arguments of the kernel*/
+ virtual GpuKernelArgumentList get_kernel_arguments()
+ {
+ return {};
+ }
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
index d5c03c60c5..d78956f835 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
@@ -30,6 +30,7 @@
#include "arm_compute/core/Window.h"
#include "src/common/utils/Log.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
@@ -42,29 +43,24 @@ namespace experimental
namespace dynamic_fusion
{
GpuCkwDriver::GpuCkwDriver(const GpuKernelComponentGroup &components)
- : _components{ components }
+ : _components{ components }, _kernel{ GpuTargetLanguage::OpenCL }
{
}
std::string GpuCkwDriver::get_name()
{
ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO"));
- return "todo_get_name";
+ return "unnamed";
}
std::string GpuCkwDriver::get_code()
{
- ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO"));
- ckw::Kernel kernel(get_name().c_str(), GpuTargetLanguage::OpenCL);
- GpuCkwKernelWriter root_writer(kernel);
+ _kernel.name(get_name());
+ GpuCkwKernelWriter root_writer(_kernel);
GpuCkwScopedKernelWriter writer(&root_writer);
GpuCkwVariableTable vtable{};
// Global Kernel Writer Driver code
-
- // The following is just an incomplete example of using the kernel writer
-
- // Iterate over component specific Ckw Driver; generate component code and concatenate them
for(auto &comp : _components)
{
auto ckw_driver = comp->ckw_component_driver();
@@ -96,18 +92,31 @@ Window GpuCkwDriver::get_window() const
return root_comp->ckw_component_driver()->get_window();
}
-std::map<ITensorInfo::Id, GpuKernelArgument> GpuCkwDriver::get_tensors()
+GpuKernelArgumentList GpuCkwDriver::get_kernel_arguments()
{
- ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO"));
- // Assemble GpuKernelArguments
- std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
- for(const auto t : _components.get_argument_tensors())
+ GpuKernelArgumentList args{};
+ for(const auto &arg : _kernel.arguments())
{
- tensors.emplace(
- t->id(),
- GpuKernelArgument{ *t, { GpuKernelArgumentInfo::Type::Tensor_Special_0 } });
+ switch(arg.type())
+ {
+ case KernelArgument::Type::TensorStorage:
+ {
+ args.emplace_back(static_cast<ITensorInfo::Id>(arg.id()), from_ckw(arg.tensor_storage_type()));
+ break;
+ }
+ case KernelArgument::Type::TensorComponent:
+ {
+ args.emplace_back(static_cast<ITensorInfo::Id>(arg.id()), from_ckw(arg.tensor_component_type()));
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Unsupported KernelArgument Type");
+ break;
+ }
+ }
}
- return tensors;
+ return args;
}
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
index 2084b72098..c6e03f6e03 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
@@ -28,6 +28,8 @@
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h"
+#include "ckw/Kernel.h"
+
#include <map>
#include <string>
@@ -66,11 +68,12 @@ public:
std::string get_config_id() override;
/** Generate execution window */
Window get_window() const override;
- /** Get the kernel argument lists of the kernel*/
- std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() override;
+ /** Get the flat list of arguments of the kernel*/
+ GpuKernelArgumentList get_kernel_arguments() override;
private:
GpuKernelComponentGroup _components{};
+ ckw::Kernel _kernel;
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
index 154968775c..6f3eca711d 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
@@ -23,9 +23,10 @@
*/
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h"
#include <sstream>
@@ -35,7 +36,8 @@ namespace experimental
{
namespace dynamic_fusion
{
-GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, const std::string &alias)
+GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage,
+ const std::string &alias)
{
ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected");
@@ -59,7 +61,7 @@ GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelCo
std::stringstream ss;
ss << alias << "_t" << abs(tensor->id());
const auto uniq_name = ss.str();
- GpuCkwComponentArgument var{ writer->declare_tensor_argument(uniq_name.c_str(), to_ckw(*tensor)) };
+ GpuCkwComponentArgument var{ writer->declare_tensor_argument(uniq_name, to_ckw(*tensor), to_ckw(storage)) };
auto &&inserted = _vars.emplace(tensor->id(), var);
return &(inserted.first->second);
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
index 1c9cb083ea..0649dcba9d 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
@@ -24,8 +24,8 @@
#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
#include "arm_compute/core/ITensorInfo.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
#include <map>
@@ -37,6 +37,7 @@ namespace dynamic_fusion
{
class GpuKernelComponentGroup;
class GpuCkwScopedKernelWriter;
+enum class TensorStorageType;
/** A table of all the variables used in the kernel.
*
@@ -52,11 +53,13 @@ public:
* @param[in] comp_group Component group the tensor belongs to
* @param[in] writer Compute Kernel Writer
* @param[in] tensor Tensor info with which the new variable is associated
+ * @param[in] storage Tensor storage type associated with the tensor
* @param[in] alias Alias for the variable. Will be used as part of the variable name
*
* @return GpuCkwComponentArgument*
*/
- GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, const std::string &alias = "unnamed");
+ GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage,
+ const std::string &alias = "unnamed");
private:
std::map<ITensorInfo::Id, GpuCkwComponentArgument> _vars{};
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
index 224c176a31..c07fac0e0d 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
@@ -23,14 +23,15 @@
*/
#include "GpuCkwActivation.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "ckw/TensorTileSampler.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include <string>
@@ -84,8 +85,8 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
} // namespace
GpuCkwActivation::GpuCkwActivation(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
: IGpuCkwComponentDriver{ id, tensors },
_src{},
_dst{},
@@ -102,8 +103,8 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
load_src_dst_tiles_and_prepare_sampler(writer, src, dst, m0, n0, create_sampler);
@@ -111,14 +112,14 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp
auto &dst_tile = dst->tile();
// Constants
- const auto &constant_minus_1 = writer->declare_tile("minus_1", -1);
- const auto &constant_pos_1 = writer->declare_tile("one", 1);
- const auto &constant_zero = writer->declare_tile("zero", 0);
- const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a());
- const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b());
+ const auto &constant_minus_1 = writer->declare_tile("minus_1", -1);
+ const auto &constant_pos_1 = writer->declare_tile("one", 1);
+ const auto &constant_zero = writer->declare_tile("zero", 0);
+ const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a());
+ const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b());
// Perform the operation.
- switch (_attributes.activation())
+ switch(_attributes.activation())
{
case ActivationLayerInfo::ActivationFunction::LOGISTIC:
{
@@ -178,9 +179,9 @@ Window GpuCkwActivation::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- constexpr unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
index dd71c55df2..8d7e6a8c37 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
@@ -23,14 +23,15 @@
*/
#include "GpuCkwCast.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "ckw/TensorTileSampler.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h"
#include <string>
@@ -84,8 +85,8 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
} // namespace
GpuCkwCast::GpuCkwCast(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
: IGpuCkwComponentDriver{ id, tensors },
_src{},
_dst{},
@@ -102,8 +103,8 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
// Load the source tile and prepare the sampler.
if(!src->has_tile())
@@ -124,7 +125,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
if(!dst->has_tile())
{
// Get Target datatype and convert it to ckw::DataType.
- ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type());
+ ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type());
// Create dst_tile based on src_tile dimensions and with target DataType.
const TileInfo src_tile_info = src_tile.tile_info();
@@ -166,9 +167,9 @@ Window GpuCkwCast::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- constexpr unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
index 685bf391dc..15e32e26d5 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
@@ -23,14 +23,16 @@
*/
#include "GpuCkwElementwiseBinary.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "ckw/TensorTileSampler.h"
#include "ckw/types/TensorSamplerTypes.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include <string>
@@ -54,14 +56,20 @@ inline TensorTileSampler create_simple_sampler(GpuCkwScopedKernelWriter &writer,
auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32);
auto &gid_2 = writer->declare_tile("gid_2", ckw::DataType::Int32);
- auto &const_0 = writer->declare_tile("0", 0);
-
writer->op_get_global_id(gid_0, 0);
writer->op_get_global_id(gid_1, 1);
writer->op_get_global_id(gid_2, 2);
- sampler.x(gid_0);
- sampler.y(gid_1);
+ auto &x_coord = writer->declare_tile("x_coord", ckw::DataType::Int32);
+ auto &y_coord = writer->declare_tile("y_coord", ckw::DataType::Int32);
+ auto &m0_t = writer->declare_tile("m0", m0);
+ auto &n0_t = writer->declare_tile("n0", n0);
+ writer->op_binary_expression(x_coord, gid_0, ckw::BinaryOp::Mul, n0_t);
+ writer->op_binary_expression(y_coord, gid_1, ckw::BinaryOp::Mul, m0_t);
+
+ sampler.x(x_coord);
+ sampler.y(y_coord);
+ auto &const_0 = writer->declare_tile("0", 0);
sampler.z(const_0); // 3rd dimension collapsed with 2nd dimension
sampler.b(gid_2);
@@ -99,9 +107,9 @@ void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_gr
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, "lhs");
- GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, "rhs");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs");
+ GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs");
+ GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
// Load the LHS and RHS tiles and prepare the tensor sampler.
load_lhs_rhs_tiles_and_prepare_sampler(writer, lhs, rhs, m0, n0, create_simple_sampler);
@@ -131,10 +139,9 @@ Window GpuCkwElementwiseBinary::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- // constexpr unsigned int vector_size_byte_opencl = 16;
- // const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- const unsigned int num_elems_processed_per_iteration = 1U; // Hard-coded for now
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
index 63555e6064..247d1b834f 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
@@ -24,6 +24,7 @@
#include "GpuCkwStore.h"
#include "arm_compute/core/Error.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
@@ -43,8 +44,8 @@ GpuCkwStore::GpuCkwStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensor
}
void GpuCkwStore::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
{
- auto src = vtable.declare_variable(comp_group, writer, _src, "src");
- auto dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ auto src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ auto dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
auto &src_tile = src->tile();
const auto &sampler = src->tile_sampler();
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
index 9027bddd76..8a38d67d80 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
@@ -28,6 +28,7 @@
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
#include "ckw/TensorInfo.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
namespace arm_compute
{
@@ -98,6 +99,103 @@ inline ckw::TensorInfo to_ckw(const ITensorInfo &tensor_info)
tensor_info.id()
};
}
+
+inline TensorComponentType from_ckw(const ckw::TensorComponentType &component)
+{
+ switch(component)
+ {
+ case ckw::TensorComponentType::OffsetFirstElement:
+ return TensorComponentType::OffsetFirstElement;
+ break;
+ case ckw::TensorComponentType::Stride0:
+ return TensorComponentType::Stride0;
+ break;
+ case ckw::TensorComponentType::Stride1:
+ return TensorComponentType::Stride1;
+ break;
+ case ckw::TensorComponentType::Stride2:
+ return TensorComponentType::Stride2;
+ break;
+ case ckw::TensorComponentType::Stride3:
+ return TensorComponentType::Stride3;
+ break;
+ case ckw::TensorComponentType::Stride4:
+ return TensorComponentType::Stride4;
+ break;
+ case ckw::TensorComponentType::Dim0:
+ return TensorComponentType::Dim0;
+ break;
+ case ckw::TensorComponentType::Dim1:
+ return TensorComponentType::Dim1;
+ break;
+ case ckw::TensorComponentType::Dim2:
+ return TensorComponentType::Dim2;
+ break;
+ case ckw::TensorComponentType::Dim3:
+ return TensorComponentType::Dim3;
+ break;
+ case ckw::TensorComponentType::Dim4:
+ return TensorComponentType::Dim4;
+ break;
+ case ckw::TensorComponentType::Dim1xDim2:
+ return TensorComponentType::Dim1xDim2;
+ break;
+ case ckw::TensorComponentType::Dim2xDim3:
+ return TensorComponentType::Dim2xDim3;
+ break;
+ case ckw::TensorComponentType::Dim1xDim2xDim3:
+ return TensorComponentType::Dim1xDim2xDim3;
+ break;
+ case ckw::TensorComponentType::Unknown:
+ return TensorComponentType::Unknown;
+ default:
+ ARM_COMPUTE_ERROR("Unknown CKW tensor component");
+ return TensorComponentType::Unknown;
+ }
+}
+
+inline ckw::TensorStorageType to_ckw(const TensorStorageType &storage)
+{
+ switch(storage)
+ {
+ case TensorStorageType::ClBufferUint8Ptr:
+ return ckw::TensorStorageType::BufferUint8Ptr;
+ break;
+ case TensorStorageType::ClImage2dReadOnly:
+ return ckw::TensorStorageType::Texture2dReadOnly;
+ break;
+ case TensorStorageType::ClImage2dWriteOnly:
+ return ckw::TensorStorageType::Texture2dWriteOnly;
+ break;
+ case TensorStorageType::Unknown:
+ return ckw::TensorStorageType::Unknown;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unknown tensor storage type");
+ return ckw::TensorStorageType::Unknown;
+ }
+}
+inline TensorStorageType from_ckw(const ckw::TensorStorageType &storage)
+{
+ switch(storage)
+ {
+ case ckw::TensorStorageType::BufferUint8Ptr:
+ return TensorStorageType::ClBufferUint8Ptr;
+ break;
+ case ckw::TensorStorageType::Texture2dReadOnly:
+ return TensorStorageType::ClImage2dReadOnly;
+ break;
+ case ckw::TensorStorageType::Texture2dWriteOnly:
+ return TensorStorageType::ClImage2dWriteOnly;
+ break;
+ case ckw::TensorStorageType::Unknown:
+ return TensorStorageType::Unknown;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unknown CKW tensor storage type");
+ return TensorStorageType::Unknown;
+ }
+}
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
index d600956b4f..af766a7ece 100644
--- a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
+++ b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
@@ -104,9 +104,11 @@ public:
{
return _properties;
}
- /** Get template writer for the component */
- virtual const IGpuTemplateComponentWriter *template_writer() const = 0;
- /** Get compute kernel writer driver for the component */
+ /** Get writer for the component */
+ virtual const IGpuTemplateComponentWriter *template_writer() const
+ {
+ return nullptr;
+ }
virtual const IGpuCkwComponentDriver *ckw_component_driver() const
{
return nullptr;
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
index d2cde40a10..c41257d18c 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
@@ -24,8 +24,11 @@
#include "ClComponentActivation.h"
#include "src/core/CL/CLValidate.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h"
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h"
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -66,8 +69,17 @@ ClComponentActivation::ClComponentActivation(ComponentId
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
: IGpuKernelComponent{ id, properties, tensors },
- _component_writer{ std::make_unique<ClTemplateActivation>(id, tensors, attributes) },
- _ckw_driver{ std::make_unique<GpuCkwActivation>(id, tensors, attributes) }
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
+{
+ std::make_unique<ClTemplateActivation>(id, tensors, attributes)
+}
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
+{
+ std::make_unique<GpuCkwActivation>(id, tensors, attributes)
+}
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
}
@@ -75,15 +87,15 @@ ClComponentActivation::~ClComponentActivation()
{
}
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *ClComponentActivation::template_writer() const
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+const IGpuCkwComponentDriver *ClComponentActivation::ckw_component_driver() const
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
-const IGpuCkwComponentDriver *ClComponentActivation::ckw_component_driver() const
-{
- return _ckw_driver.get();
-}
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
index bb6f7c6e30..ebe8719420 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
@@ -42,8 +42,11 @@ template <typename T>
class ArgumentPack;
/** Forward declaration */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
class ClTemplateActivation;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwActivation;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentActivation final : public IGpuKernelComponent
{
@@ -106,10 +109,12 @@ public:
/** Allow instances of this class to be moved */
ClComponentActivation &operator=(ClComponentActivation &&component) = default;
- /** Get template writer for the component */
+ /** Get writer for the component */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *template_writer() const override;
-
- const IGpuCkwComponentDriver *ckw_component_driver() const override;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
@@ -118,8 +123,11 @@ public:
}
private:
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<ClTemplateActivation> _component_writer;
- std::unique_ptr<GpuCkwActivation> _ckw_driver;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ std::unique_ptr<GpuCkwActivation> _component_writer;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
index 92933ae7a5..635869f817 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
@@ -26,8 +26,11 @@
#include "arm_compute/core/Error.h"
#include "src/core/CL/CLValidate.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h"
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h"
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h"
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -67,23 +70,32 @@ ClComponentCast::ClComponentCast(ComponentId id,
const Attributes &attributes,
const Settings &settings)
: IGpuKernelComponent{ id, properties, tensors },
- _component_writer{ std::make_unique<ClTemplateCast>(id, tensors, attributes) },
- _ckw_driver{ std::make_unique<GpuCkwCast>(id, tensors, attributes) }
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
+{
+ std::make_unique<ClTemplateCast>(id, tensors, attributes)
+}
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
+{
+ std::make_unique<GpuCkwCast>(id, tensors, attributes)
+}
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
ARM_COMPUTE_UNUSED(attributes, settings);
}
ClComponentCast::~ClComponentCast()
{
}
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *ClComponentCast::template_writer() const
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+const IGpuCkwComponentDriver *ClComponentCast::ckw_component_driver() const
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
-const IGpuCkwComponentDriver *ClComponentCast::ckw_component_driver() const
-{
- return _ckw_driver.get();
-}
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
index 174f9670b3..37b8cbb6c9 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
@@ -48,8 +48,11 @@ private:
};
/** Forward declaration */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
class ClTemplateCast;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwCast;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentCast final : public IGpuKernelComponent
{
@@ -116,10 +119,12 @@ public:
ClComponentCast(ClComponentCast &&component) = default;
/** Allow instances of this class to be moved */
ClComponentCast &operator=(ClComponentCast &&component) = default;
- /** Get template writer for the component */
+ /** Get writer for the component */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *template_writer() const override;
- /** Get GPU kernel writer for the component */
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ckw_component_driver() const override;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
{
@@ -127,8 +132,11 @@ public:
}
private:
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<ClTemplateCast> _component_writer;
- std::unique_ptr<GpuCkwCast> _ckw_driver;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ std::unique_ptr<GpuCkwCast> _component_writer;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
index 52739e23c0..88d729170c 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
@@ -25,8 +25,11 @@
#include "arm_compute/core/Validate.h"
#include "src/core/CL/CLValidate.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h"
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h"
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h"
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -106,28 +109,38 @@ Status ClComponentElementwiseBinary::validate(const ArgumentPack<ITensorInfo> &t
return Status{};
}
+ClComponentElementwiseBinary::~ClComponentElementwiseBinary()
+{
+}
ClComponentElementwiseBinary::ClComponentElementwiseBinary(
ComponentId id,
const Properties &properties,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
: IGpuKernelComponent{ id, properties, tensors },
- _component_writer{ std::make_unique<ClTemplateElementwiseBinary>(id, tensors, attributes) },
- _ckw_driver{ std::make_unique<GpuCkwElementwiseBinary>(id, tensors, attributes) }
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
{
+ std::make_unique<ClTemplateElementwiseBinary>(id, tensors, attributes)
}
-ClComponentElementwiseBinary::~ClComponentElementwiseBinary()
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
{
+ std::make_unique<GpuCkwElementwiseBinary>(id, tensors, attributes)
}
-const IGpuTemplateComponentWriter *ClComponentElementwiseBinary::template_writer() const
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
- return _component_writer.get();
}
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+const IGpuTemplateComponentWriter *ClComponentElementwiseBinary::template_writer() const
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentElementwiseBinary::ckw_component_driver() const
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
- return _ckw_driver.get();
+ return _component_writer.get();
}
+
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
index a56dd8b37d..f7175903d0 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
@@ -40,8 +40,11 @@ template <typename T>
class ArgumentPack;
/** Forward declaration */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
class ClTemplateElementwiseBinary;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwElementwiseBinary;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentElementwiseBinary final : public IGpuKernelComponent
{
@@ -101,10 +104,13 @@ public:
ClComponentElementwiseBinary(ClComponentElementwiseBinary &&component) = default;
/** Allow instances of this class to be moved */
ClComponentElementwiseBinary &operator=(ClComponentElementwiseBinary &&component) = default;
- /** Get template writer for the component */
+ /** Get writer for the component */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *template_writer() const override;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuCkwComponentDriver *ckw_component_driver() const override;
/** Get component type */
GpuComponentType type() const override
{
@@ -112,10 +118,13 @@ public:
}
private:
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<ClTemplateElementwiseBinary> _component_writer;
- std::unique_ptr<GpuCkwElementwiseBinary> _ckw_driver;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ std::unique_ptr<GpuCkwElementwiseBinary> _component_writer;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY */
+#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY */
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
index a3283b1866..12b81c3d56 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
@@ -24,8 +24,11 @@
#include "ClComponentStore.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h"
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h"
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h"
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
#include <memory>
@@ -43,19 +46,30 @@ Status ClComponentStore::validate(
return Status{};
}
ClComponentStore::ClComponentStore(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors)
- : IGpuKernelComponent{ id, properties, tensors }, _component_writer{ std::make_unique<ClTemplateStore>(id, tensors) }, _ckw_driver{ std::make_unique<GpuCkwStore>(id, tensors) }
+ : IGpuKernelComponent{ id, properties, tensors },
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
{
+ std::make_unique<ClTemplateStore>(id, tensors)
}
-ClComponentStore::~ClComponentStore()
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
{
+ std::make_unique<GpuCkwStore>(id, tensors)
}
-const IGpuTemplateComponentWriter *ClComponentStore::template_writer() const
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
- return _component_writer.get();
}
+ClComponentStore::~ClComponentStore()
+{
+}
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+const IGpuTemplateComponentWriter *ClComponentStore::template_writer() const
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentStore::ckw_component_driver() const
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
- return _ckw_driver.get();
+ return _component_writer.get();
}
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
index f168ccb97e..853ee39012 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
@@ -25,7 +25,6 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h"
#include <memory>
namespace arm_compute
@@ -39,7 +38,11 @@ namespace dynamic_fusion
/** Forward declaration */
template <typename T>
class ArgumentPack;
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+class ClTemplateStore;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwStore;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentStore final : public IGpuKernelComponent
{
@@ -85,10 +88,12 @@ public:
ClComponentStore(ClComponentStore &&component) = default;
/** Allow instances of this class to be moved */
ClComponentStore &operator=(ClComponentStore &&component) = default;
- /** Get template writer for the component */
+ /** Get writer for the component */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *template_writer() const override;
-
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ckw_component_driver() const override;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
{
@@ -96,8 +101,11 @@ public:
}
private:
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<ClTemplateStore> _component_writer;
- std::unique_ptr<GpuCkwStore> _ckw_driver;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ std::unique_ptr<GpuCkwStore> _component_writer;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp
index 7ab7c8a3fc..71b0114225 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test if ACL_INTERNAL_TEST_CKW_IN_DF and the op has not been ported to ckw
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h"
#include "tests/CL/CLAccessor.h"
@@ -432,3 +433,5 @@ TEST_SUITE_END() // CL
} // namespace validation
} // namespace test
} // namespace arm_compute
+
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp
index f27a1796c9..5ab1fafe2f 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test if ACL_INTERNAL_TEST_CKW_IN_DF and the op has not been ported to ckw
#include "tests/AssetsLibrary.h"
#include "tests/CL/CLAccessor.h"
#include "tests/framework/Fixture.h"
@@ -249,3 +250,5 @@ TEST_SUITE_END() // CL
} // namespace validation
} // namespace test
} // namespace arm_compute
+
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp
index f4478db42b..7f5efd662a 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test if ACL_INTERNAL_TEST_CKW_IN_DF and the op has not been ported to ckw
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
#include "tests/CL/CLAccessor.h"
@@ -132,7 +133,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuPool2dFixture<float>, framework
validate(CLAccessor(_target), _reference, tolerance_f32);
}
FIXTURE_DATA_TEST_CASE(RunSpecial, DFSpecialGpuPool2dFixture<float>, framework::DatasetMode::ALL, combine(datasets::PoolingLayerDatasetSpecialDynamicFusion(),
- framework::dataset::make("DataType", DataType::F32)))
+ framework::dataset::make("DataType", DataType::F32)))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -231,3 +232,5 @@ TEST_SUITE_END() // CL
}
}
}
+
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp b/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp
index bdaa1be531..4d038b2780 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp
@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test if ACL_INTERNAL_TEST_CKW_IN_DF and the op has not been ported to ckw
#include "tests/CL/CLAccessor.h"
#include "tests/datasets/ReshapeLayerDataset.h"
#include "tests/framework/Macros.h"
@@ -121,3 +122,5 @@ TEST_SUITE_END() // CL
} // namespace validation
} // namespace test
} // namespace arm_compute
+
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp b/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp
index 5f99cd6d78..9ca1c5f0da 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test if ACL_INTERNAL_TEST_CKW_IN_DF and the op has not been ported to ckw
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h"
#include "tests/CL/CLAccessor.h"
@@ -517,3 +518,5 @@ TEST_SUITE_END() // CL
} // namespace validation
} // namespace test
} // namespace arm_compute
+
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp b/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp
index e8314d700d..340f5dc2a3 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp
@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test if ACL_INTERNAL_TEST_CKW_IN_DF and the op has not been ported to ckw
#include "arm_compute/core/Types.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.h"
@@ -196,3 +197,5 @@ TEST_SUITE_END() // CL
} // namespace validation
} // namespace test
} // namespace arm_compute
+
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF