aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2023-06-28 09:49:45 +0100
committerSiCong Li <sicong.li@arm.com>2023-07-25 15:48:50 +0000
commit23882a9014eb3972bca958206866c8e0d0b829cc (patch)
tree9139b91699099160e26a64abd8cf182bd7447278 /src/dynamic_fusion/sketch
parent0a59e69fd922b02d9e3b5b043ee7f891061df7be (diff)
downloadComputeLibrary-23882a9014eb3972bca958206866c8e0d0b829cc.tar.gz
Add GpuKernelArgumentBinding for runtime argument setting
* Add flexible runtime argument setting that accept argument bindings exported from ckw. * Introduce internal build flag ACL_INTERNAL_TEST_CKW_IN_DF. If set to true, ckw will be tested in dynamic fusion validation tests. Otherwise it will not be tested and the dynamic fusion will keep using ClTemplateWriter instead. * Fix CKW sampler for elementwise binary to deal with tile sizes > 1 in both dimensions Resolves: COMPMID-6282 Partially resolves: COMPMID-6260 Signed-off-by: SiCong Li <sicong.li@arm.com> Change-Id: I0ab225a4484eb2119643d900a4e72806558626ee Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9917 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-by: Anitha Raj <Anitha.Raj@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion/sketch')
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h128
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp6
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h10
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp8
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h6
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h16
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp16
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp43
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h30
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h44
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h105
-rw-r--r--src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h20
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp45
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h7
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp8
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h7
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp31
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp21
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp33
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp5
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h98
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h8
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp26
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h16
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp26
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h14
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp27
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h21
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp26
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h16
30 files changed, 668 insertions, 199 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
index 302d4c8562..226e1a2df3 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
@@ -33,6 +33,7 @@ namespace experimental
namespace dynamic_fusion
{
/** Contain information required to set up a kernel argument at run time
+ * @deprecated To be removed along with ClTemplateWriter
*/
struct GpuKernelArgumentInfo
{
@@ -66,10 +67,9 @@ struct GpuKernelArgumentInfo
}
Type type{ Type::Tensor_4D_t_Buffer };
};
-
bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1);
-
/** Kernel argument information linked with its corresponding @ref ITensorInfo
+ * @deprecated To be removed along with ClTemplateWriter
*/
class GpuKernelArgument
{
@@ -124,6 +124,130 @@ private:
TensorInfo _tensor_info{};
GpuKernelArgumentInfo _kernel_arg_info{};
};
+#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
+/** Describe how the tensor runtime memory can be accessed
+ *
+ * Please see documentation under @ref GpuKernelArgumentBinding
+ */
+enum class TensorStorageType
+{
+ Unknown,
+ ClBufferUint8Ptr,
+ ClImage2dReadOnly,
+ ClImage2dWriteOnly,
+};
+
+/** Describe additional runtime information about the tensor
+ *
+ * Please see documentation under @ref GpuKernelArgumentBinding
+ */
+enum class TensorComponentType
+{
+ Unknown,
+ OffsetFirstElement,
+ Stride0,
+ Stride1,
+ Stride2,
+ Stride3,
+ Stride4,
+ Dim0,
+ Dim1,
+ Dim2,
+ Dim3,
+ Dim4,
+ Dim1xDim2,
+ Dim2xDim3,
+ Dim1xDim2xDim3,
+};
+
+/** Describe how to extract information from a runtime Gpu tensor, and set it as an argument to a gpu kernel at runtime
+ *
+ * A kernel argument is just an argument to the gpu kernel as shown in the argument list below. This contrasts with a "workload argument" which is a tensor (@ref GpuWorkloadArgument)
+ * void kernel(arg0, arg1, ... argN)
+ *
+ * In a kernel generated using dynamic fusion (@ref GpuKernelSourceCode), every kernel argument describes part of a tensor.
+ * A tensor is described as: **storages** followed by **components**
+ *
+ * A storage (@ref TensorStorageType) describes how the tensor runtime memory can be accessed (e.g. via a global uint8 pointer to a CL buffer)
+ * A component (@ref TensorComponentType) describes additional runtime information about the tensor (e.g. the dimensions of the tensor)
+ *
+ * The arguments are arranged in the order of use in the generated kernel code:
+ *
+ * arg0 , arg1 , arg2 , ..., , argN
+ * storage, component0, component1, ..., componentX, storage, component0, component1, ..., componentY
+ * | tensor0 | tensor1 |
+ *
+ * An example argument list:
+ *
+ * void kernel(
+ * image2d_t t0_image, // TensorStorageType::ClImage2dReadOnly
+ * uint8_t* t0_ptr, // TensorStorageType::ClBufferUint8Ptr
+ * uint t0_dim0, // TensorComponentType::Dim0
+ * uint t0_stride1, // TensorComponentType::Stride1
+ * image2d_t t1_ptr, // TensorStorageType::ClImage2dReadOnly
+ * uint t1_dim1xdim2, // TensorComponentType::Dim1xDim2
+ * uint t1_stride1, // TensorComponentType::Stride1
+ * uint t1_stride2, // TensorComponentType:Stride2
+ * )
+ *
+ */
+class GpuKernelArgumentBinding
+{
+public:
+ enum class Type : int32_t
+ {
+ TensorStorage, /** @ref TensorStorageType */
+ TensorComponent /** @ref TensorComponentType */
+ };
+ GpuKernelArgumentBinding(ITensorInfo::Id id, TensorStorageType storage)
+ : _type{ Type::TensorStorage }, _id{ id }, _value{}
+ {
+ _value.tensor_storage_type = storage;
+ }
+ GpuKernelArgumentBinding(ITensorInfo::Id id, TensorComponentType component)
+ : _type{ Type::TensorComponent }, _id{ id }, _value{}
+ {
+ _value.tensor_component_type = component;
+ }
+ /** Storage type of the tensor
+ */
+ TensorStorageType tensor_storage_type() const
+ {
+ ARM_COMPUTE_ERROR_ON(_type != Type::TensorStorage);
+ return _value.tensor_storage_type;
+ }
+ /** Component of the tensor
+ */
+ TensorComponentType tensor_component_type() const
+ {
+ ARM_COMPUTE_ERROR_ON(_type != Type::TensorComponent);
+ return _value.tensor_component_type;
+ }
+ /** Id of the tensor this kernel argument belongs to
+ */
+ ITensorInfo::Id id() const
+ {
+ return _id;
+ }
+ /** Type of the kernel argument
+ */
+ Type type() const
+ {
+ return _type;
+ }
+
+private:
+ Type _type;
+ ITensorInfo::Id _id;
+ union Value
+ {
+ TensorStorageType tensor_storage_type;
+ TensorComponentType tensor_component_type;
+ };
+ Value _value;
+};
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
+
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
index b70a192775..5a65ede38b 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
@@ -44,14 +44,14 @@ std::vector<DependencyGraph::TensorId> GpuKernelComponentGraph::get_tensor_ids(c
return tensor_ids;
}
-GpuKernelComponentGraph::GpuKernelComponentGraph(GpuComponentServices *services)
- : _services{ services }, _components{}, _tensors{}, _dependency_graph{}
+GpuKernelComponentGraph::GpuKernelComponentGraph(GpuWorkloadContext *context, GpuComponentServices *services)
+ : _context{ context }, _services{ services }, _components{}, _tensors{}, _dependency_graph{}
{
}
GpuKernelComponentStream GpuKernelComponentGraph::fuse(const MemoryDescriptorMap &mem_map) const
{
- GpuKernelComponentStream stream{ _services, mem_map };
+ GpuKernelComponentStream stream{ _context, _services, mem_map };
const auto op_seq = _dependency_graph.build_operators_sequence();
stream.new_component_group();
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
index 8314ea0a50..85c9b45840 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
@@ -49,9 +49,10 @@ class GpuKernelComponentGraph
public:
/** Constructor
*
+ * @param[in] context @ref GpuWorkloadContext to be used by the graph
* @param[in] services @ref GpuComponentServices to be used by the graph
*/
- GpuKernelComponentGraph(GpuComponentServices *services);
+ GpuKernelComponentGraph(GpuWorkloadContext *context, GpuComponentServices *services);
/** Prevent instances of this class from being copy constructed */
GpuKernelComponentGraph(const GpuKernelComponentGraph &graph) = delete;
/** Prevent instances of this class from being copied */
@@ -98,6 +99,7 @@ public:
private:
static std::vector<DependencyGraph::TensorId> get_tensor_ids(const std::vector<const ITensorInfo *> tensors);
+ GpuWorkloadContext *_context;
GpuComponentServices *_services;
std::map<ComponentId, std::unique_ptr<IGpuKernelComponent>> _components;
std::map<ITensorInfo::Id, const ITensorInfo *> _tensors;
@@ -106,4 +108,4 @@ private:
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH */
+#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
index 8f4eadc477..a2b6623370 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -33,8 +33,8 @@ namespace experimental
{
namespace dynamic_fusion
{
-GpuKernelComponentStream::GpuKernelComponentStream(GpuComponentServices *services, const MemoryDescriptorMap &mem_map)
- : _services{ services }, _component_groups{}, _mem_map{ mem_map }
+GpuKernelComponentStream::GpuKernelComponentStream(GpuWorkloadContext *context, GpuComponentServices *services, const MemoryDescriptorMap &mem_map)
+ : _context{ context }, _services{ services }, _component_groups{}, _mem_map{ mem_map }
{
}
@@ -51,7 +51,7 @@ GpuWorkloadSourceCode GpuKernelComponentStream::write_workload_code()
const GpuKernelSourceCode kernel_code = logical_kernel.write_kernel_code();
// The whole unit workload stage is determined by the root component
const auto unit_workload_stage = group.get_root_component()->properties().stage();
- source_code.add_unit_workload(kernel_code, unit_workload_stage, _mem_map);
+ source_code.add_unit_workload(kernel_code, unit_workload_stage, _mem_map, _context);
}
return source_code;
}
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
index cbaa7c297b..ba2503a938 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,10 +49,11 @@ class GpuKernelComponentStream
public:
/** Constructor
*
+ * @param[in] context @ref GpuWorkloadContext to be used throughout the stream
* @param[in] services @ref GpuComponentServices to be used throughout the stream
* @param[in] mem_map @ref MemoryDescriptor map used to assemble the @ref GpuWorkloadSourceCode
*/
- GpuKernelComponentStream(GpuComponentServices *services, const MemoryDescriptorMap &mem_map);
+ GpuKernelComponentStream(GpuWorkloadContext *context, GpuComponentServices *services, const MemoryDescriptorMap &mem_map);
/** Allow instances of this class to be copy constructed */
GpuKernelComponentStream(const GpuKernelComponentStream &stream) = default;
/** Allow instances of this class to be copied */
@@ -78,6 +79,7 @@ public:
bool add_component(IGpuKernelComponent *component);
private:
+ GpuWorkloadContext *_context;
GpuComponentServices *_services;
std::vector<GpuKernelComponentGroup> _component_groups{};
MemoryDescriptorMap _mem_map{};
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
index 7479328d7b..64e1cdc3bc 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,14 +21,18 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE
#include "arm_compute/core/CL/CLCompileContext.h"
#include "arm_compute/core/Window.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
#include <map>
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+#include <deque>
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
#include <string>
namespace arm_compute
@@ -38,7 +42,11 @@ namespace experimental
namespace dynamic_fusion
{
/** The argument list of a @ref GpuKernelSourceCode */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
using GpuKernelArgumentList = std::map<ITensorInfo::Id, GpuKernelArgument>;
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+using GpuKernelArgumentList = std::deque<GpuKernelArgumentBinding>;
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Container of kernel code to be compiled and run in a @ref GpuUnitWorkload
*/
@@ -123,4 +131,4 @@ private:
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE */
+#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
index 00f625de28..c99984fc0e 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,7 +29,11 @@
#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h"
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h"
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h"
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -46,11 +50,19 @@ GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKern
GpuKernelSourceCode GpuLogicalKernel::write_kernel_code()
{
GpuKernelSourceCode code;
- ClTemplateWriter writer{ _comp_group };
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+ ClTemplateWriter writer { _comp_group };
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ GpuCkwDriver writer { _comp_group };
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
code.name(writer.get_name());
code.code(writer.get_code());
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
code.arguments(writer.get_tensors());
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ code.arguments(writer.get_kernel_arguments());
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
code.build_options(writer.get_build_options());
code.config_id(writer.get_config_id());
code.window(writer.get_window());
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
index 50f34d9c14..c2bd012703 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
@@ -32,9 +32,8 @@ namespace experimental
{
namespace dynamic_fusion
{
-
GpuWorkloadContext::GpuWorkloadContext(CLCompileContext *cl_compile_ctx)
- : _impl { std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx) }
+ : _impl{ std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx) }
{
}
@@ -75,8 +74,7 @@ const GpuWorkloadContext::Impl &GpuWorkloadContext::implementation() const
}
GpuWorkloadContext::Impl::Impl(GpuLanguage gpu_language, CLCompileContext *cl_compile_ctx)
- : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx),
- _next_tensor_id(1), _mem_map()
+ : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx), _next_tensor_id(1), _mem_map(), _managed_tensor_info()
{
}
@@ -103,26 +101,39 @@ void GpuWorkloadContext::Impl::register_user_tensor(ITensorInfo &tensor_info)
tensor_info.set_id(tensor_id);
_mem_map[tensor_id] = MemoryDescriptor{ MemoryType::User };
+ // Save a *copy* of the user tensor info in workload context for future reference
+ // Note that this means if the user modifies the @p tensor_info, the change will not be reflected in the context
+ _managed_tensor_info.emplace(tensor_info.id(), std::make_unique<TensorInfo>(tensor_info));
}
-void GpuWorkloadContext::Impl::register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info)
+ITensorInfo *GpuWorkloadContext::Impl::create_virtual_tensor()
{
- ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id());
-
- const auto tensor_id = next_tensor_id();
-
- tensor_info.set_id(tensor_id);
- _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, mem_info };
+ auto tensor_info = std::make_unique<TensorInfo>();
+ const auto tensor_id = -next_tensor_id();
+ tensor_info->set_id(tensor_id);
+ _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual };
+ auto inserted = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info));
+ return inserted.first->second.get();
}
-void GpuWorkloadContext::Impl::register_virtual_tensor(ITensorInfo &tensor_info)
+ITensorInfo *GpuWorkloadContext::Impl::create_auxiliary_tensor(const ITensorInfo &itensor_info)
{
- ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id());
+ auto tensor_info = std::make_unique<TensorInfo>(itensor_info);
+ const auto tensor_id = next_tensor_id();
+ tensor_info->set_id(tensor_id);
+ _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ tensor_info->total_size() } };
+ auto inserted = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info));
+ return inserted.first->second.get();
+}
- const auto tensor_id = -next_tensor_id();
+ITensorInfo *GpuWorkloadContext::Impl::get_tensor_info(ITensorInfo::Id id)
+{
+ return _managed_tensor_info.at(id).get();
+}
- tensor_info.set_id(tensor_id);
- _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual };
+const ITensorInfo *GpuWorkloadContext::Impl::get_tensor_info(ITensorInfo::Id id) const
+{
+ return _managed_tensor_info.at(id).get();
}
ITensorInfo::Id GpuWorkloadContext::Impl::next_tensor_id()
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h
index a857932791..c169476a70 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h
@@ -36,7 +36,6 @@ namespace experimental
{
namespace dynamic_fusion
{
-
/** Internal implementation of workload context. */
class GpuWorkloadContext::Impl
{
@@ -52,7 +51,7 @@ public:
Impl(Impl &) = default;
/** Assignment */
- Impl& operator=(Impl &) = default;
+ Impl &operator=(Impl &) = default;
/** Get target GPU language. */
GpuLanguage gpu_language() const;
@@ -69,27 +68,34 @@ public:
*/
void register_user_tensor(ITensorInfo &tensor_info);
- /** Set a new ID and register the auxiliary tensor info.
+ /** Create a virtual (see @ref MemoryType) tensor info and save it
*
- * @param[in, out] tensor_info The tensor info to be registered.
- * @param[in] mem_info The auxiliary tensor memory info.
+ * @return ITensorInfo* The created virtual tensor info object pointer
*/
- void register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info);
-
- /** Set a new ID and register the virtual tensor info.
+ ITensorInfo *create_virtual_tensor();
+ /** Create an auxiliary (see @ref MemoryType) tensor info and save it
*
- * @param[in, out] tensor_info The tensor info to be registered.
+ * @param[in] tensor_info @ref ITensorInfo to copy from
+ *
+ * @return ITensorInfo* The created auxiliary tensor info object pointer
*/
- void register_virtual_tensor(ITensorInfo &tensor_info);
+ ITensorInfo *create_auxiliary_tensor(const ITensorInfo &tensor_info);
+
+ /** Get tensor info created by this context, from id */
+ ITensorInfo *get_tensor_info(ITensorInfo::Id id);
+
+ /** Get tensor info created by this context, from id */
+ const ITensorInfo *get_tensor_info(ITensorInfo::Id id) const;
private:
ITensorInfo::Id next_tensor_id();
- GpuLanguage _gpu_language;
+ GpuLanguage _gpu_language;
CLCompileContext *_cl_compile_ctx;
- ITensorInfo::Id _next_tensor_id;
+ ITensorInfo::Id _next_tensor_id;
MemoryDescriptorMap _mem_map;
+ std::map<ITensorInfo::Id, std::unique_ptr<TensorInfo>> _managed_tensor_info;
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
index 44c99e844b..d3033898e9 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
@@ -26,13 +26,10 @@
#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h"
#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h"
#include "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h"
-
-#include <memory>
-#include <vector>
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h"
namespace arm_compute
{
@@ -52,9 +49,8 @@ public:
Context *context)
: _context{ context },
_comp_services{},
- _component_graph{ &_comp_services },
- _operator_group{},
- _managed_tensor_info_list{ std::vector<std::unique_ptr<TensorInfo>>() }
+ _component_graph{ _context, &_comp_services },
+ _operator_group{}
{
}
/** Prevent instances of this class from being copy constructed */
@@ -90,10 +86,6 @@ public:
{
return _operator_group;
}
- ITensorInfo::Id allocate_new_tensor_id()
- {
- return ++_next_id;
- }
/** Generate @ref GpuWorkloadSourceCode from the workload sketch
* @note The sketch must be valid. Any error encountered during the building of the code will be thrown.
*
@@ -110,37 +102,29 @@ public:
*/
ITensorInfo *create_virtual_tensor()
{
- auto uptr = std::make_unique<TensorInfo>();
- _context->implementation().register_virtual_tensor(*uptr);
- _managed_tensor_info_list.emplace_back(std::move(uptr));
- return _managed_tensor_info_list.back().get();
+ return _context->implementation().create_virtual_tensor();
}
/** Create an auxiliary (see @ref MemoryType) tensor info and save it
*
- * @return ITensorInfo* The created auxiliary tensor info object pointer
- */
-
- /** Create an auxiliary (see @ref MemoryType) tensor info and save it
- *
* @param[in] tensor_info @ref ITensorInfo to copy from
*
* @return ITensorInfo* The created auxiliary tensor info object pointer
*/
ITensorInfo *create_auxiliary_tensor(const ITensorInfo &tensor_info)
{
- auto uptr = std::make_unique<TensorInfo>(tensor_info);
- _context->implementation().register_aux_tensor(*uptr, AuxMemoryInfo{ uptr->total_size() });
- _managed_tensor_info_list.emplace_back(std::move(uptr));
- return _managed_tensor_info_list.back().get();
+ return _context->implementation().create_auxiliary_tensor(tensor_info);
+ }
+
+ ITensorInfo *get_tensor_info(ITensorInfo::Id id)
+ {
+ return _context->implementation().get_tensor_info(id);
}
private:
- Context *_context;
- GpuComponentServices _comp_services;
- GpuKernelComponentGraph _component_graph;
- GpuOperatorGroup _operator_group;
- ITensorInfo::Id _next_id{ ITensorInfo::invalid_tensor_id };
- std::vector<std::unique_ptr<TensorInfo>> _managed_tensor_info_list;
+ Context *_context;
+ GpuComponentServices _comp_services;
+ GpuKernelComponentGraph _component_graph;
+ GpuOperatorGroup _operator_group;
};
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
index d1d0bdf77f..578366daaf 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
@@ -27,6 +27,7 @@
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h"
namespace arm_compute
{
@@ -34,10 +35,45 @@ namespace experimental
{
namespace dynamic_fusion
{
+#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
+namespace
+{
+/** Extract kernel arguments of one tensor from a flat list of kernel arguments.
+ *
+ * @param[in] flat_kernel_args
+ * @return GpuKernelArgumentList
+ */
+GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList &flat_kernel_args)
+{
+ if(flat_kernel_args.empty())
+ {
+ return {};
+ }
+ GpuKernelArgumentList tensor_kargs{};
+
+ const GpuKernelArgumentBinding &karg_head = flat_kernel_args.front();
+ tensor_kargs.push_back(karg_head);
+ flat_kernel_args.pop_front();
+ const auto tensor_id = karg_head.id();
+
+ while(!flat_kernel_args.empty())
+ {
+ const GpuKernelArgumentBinding &karg = flat_kernel_args.front();
+ if(karg.id() != tensor_id) // Encounter the next tensor, return the current tensor's kernel arguments
+ {
+ return tensor_kargs;
+ }
+ tensor_kargs.push_back(karg);
+ flat_kernel_args.pop_front();
+ }
+ return tensor_kargs;
+}
+}
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */
using UnitWorkloadId = int32_t;
-/** Describes all the info related to a kernel in order to:
+/** Describes all the info related to a **workload argument** (tensor) in order to:
* - be used by runtime to configure gpu kernel argument
* - be used by memory managers to allocate required memory
*/
@@ -46,6 +82,7 @@ class GpuWorkloadArgument
public:
/** Default constructor */
GpuWorkloadArgument() = default;
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
/** Constructor
*
* @param[in] tensor_info @ref ITensorInfo of the workload argument
@@ -60,6 +97,22 @@ public:
_kernel_arg_info{ kernel_arg_info }
{
}
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ /** Constructor
+ *
+ * @param[in] tensor_info @ref ITensorInfo of the workload argument
+ * @param[in] mem_desc @ref MemoryDescriptor of the workload argument
+ * @param[in] kernel_arg_list @ref GpuKernelArgumentList of the workload argument
+ */
+ GpuWorkloadArgument(const ITensorInfo &tensor_info,
+ const MemoryDescriptor &mem_desc,
+ const GpuKernelArgumentList &kernel_args)
+ : _tensor_info{ tensor_info },
+ _mem_desc{ mem_desc },
+ _kernel_args{ kernel_args }
+ {
+ }
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Get tensor id within workload */
ITensorInfo::Id id() const
{
@@ -85,6 +138,7 @@ public:
{
return &_mem_desc;
}
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
/** Get @ref GpuKernelArgumentInfo of the argument */
GpuKernelArgumentInfo *kernel_argument_info()
{
@@ -95,6 +149,18 @@ public:
{
return &_kernel_arg_info;
}
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ /** Get @ref GpuKernelArgumentList of the workload tensor */
+ GpuKernelArgumentList *kernel_argument_list()
+ {
+ return &_kernel_args;
+ }
+ /** Get @ref GpuKernelArgumentList of the workload tensor */
+ const GpuKernelArgumentList *kernel_argument_list() const
+ {
+ return &_kernel_args;
+ }
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Check if the workload argument has valid id
*
* @return true If has valid id
@@ -106,9 +172,13 @@ public:
}
private:
- TensorInfo _tensor_info{};
- MemoryDescriptor _mem_desc{};
- GpuKernelArgumentInfo _kernel_arg_info{};
+ TensorInfo _tensor_info{};
+ MemoryDescriptor _mem_desc{};
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+ GpuKernelArgumentInfo _kernel_arg_info {};
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ GpuKernelArgumentList _kernel_args {};
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
/** Describes when a unit workload is run.
@@ -179,15 +249,18 @@ public:
* @param[in] kernel_code @ref GpuKernelSourceCode to be contained within the unit workload
* @param[in] stage Stage of the unit workload
* @param[in] mem_map @ref MemoryDescriptor map for all tensors within the unit workload
+ * @param[in] context @ref GpuWorkloadContext associated with the unit workload
*
* @return UnitWorkloadId Allocated unit workload id
*/
- UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage, const MemoryDescriptorMap &mem_map)
+ UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage, const MemoryDescriptorMap &mem_map, const GpuWorkloadContext *context)
{
// Use the size of the kernel codes as Id
const auto uwk_id = static_cast<UnitWorkloadId>(_unit_workloads.size());
const auto unit_work = GpuUnitWorkload(uwk_id, kernel_code, stage);
_unit_workloads.push_back(unit_work);
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+ ARM_COMPUTE_UNUSED(context);
// Assemble kernel argument with memory descriptor to form workload argument
for(const auto &id_arg : kernel_code.arguments())
{
@@ -200,6 +273,28 @@ public:
}
_tensor_uwork_map[arg_id].insert(uwk_id);
}
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ GpuKernelArgumentList flat_kernel_args = kernel_code.arguments();
+ GpuKernelArgumentList tensor_kargs{};
+ while(true)
+ {
+ tensor_kargs = extract_kernel_args_for_one_tensor(flat_kernel_args);
+ if(tensor_kargs.empty())
+ {
+ break;
+ }
+ else
+ {
+ const auto tensor_id = tensor_kargs.at(0).id();
+ _workload_arguments[tensor_id] = GpuWorkloadArgument{ *context->implementation().get_tensor_info(tensor_id), mem_map.at(tensor_id), tensor_kargs };
+ if(_tensor_uwork_map.find(tensor_id) == _tensor_uwork_map.end())
+ {
+ _tensor_uwork_map[tensor_id] = std::set<UnitWorkloadId>();
+ }
+ _tensor_uwork_map[tensor_id].insert(uwk_id);
+ }
+ }
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
return uwk_id;
}
/** Get a unit workload from its id */
diff --git a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
index ae67790b4b..28e5432224 100644
--- a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
+++ b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,9 +27,11 @@
#include "arm_compute/core/CL/CLCompileContext.h"
#include "arm_compute/core/Window.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
#include <map>
#include <string>
+#include <vector>
namespace arm_compute
{
@@ -56,8 +58,20 @@ public:
virtual std::string get_config_id() = 0;
/** Generate execution window */
virtual Window get_window() const = 0;
- /** Get the kernel argument lists of the kernel*/
- virtual std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() = 0;
+ /** Get the kernel argument lists of the kernel
+ * @deprecated To be removed along with ClTemplateWriter
+ */
+ virtual std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors()
+ {
+ return {};
+ }
+#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
+ /** Get the flat list of arguments of the kernel*/
+ virtual GpuKernelArgumentList get_kernel_arguments()
+ {
+ return {};
+ }
+#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
index d5c03c60c5..d78956f835 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
@@ -30,6 +30,7 @@
#include "arm_compute/core/Window.h"
#include "src/common/utils/Log.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
@@ -42,29 +43,24 @@ namespace experimental
namespace dynamic_fusion
{
GpuCkwDriver::GpuCkwDriver(const GpuKernelComponentGroup &components)
- : _components{ components }
+ : _components{ components }, _kernel{ GpuTargetLanguage::OpenCL }
{
}
std::string GpuCkwDriver::get_name()
{
ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO"));
- return "todo_get_name";
+ return "unnamed";
}
std::string GpuCkwDriver::get_code()
{
- ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO"));
- ckw::Kernel kernel(get_name().c_str(), GpuTargetLanguage::OpenCL);
- GpuCkwKernelWriter root_writer(kernel);
+ _kernel.name(get_name());
+ GpuCkwKernelWriter root_writer(_kernel);
GpuCkwScopedKernelWriter writer(&root_writer);
GpuCkwVariableTable vtable{};
// Global Kernel Writer Driver code
-
- // The following is just an incomplete example of using the kernel writer
-
- // Iterate over component specific Ckw Driver; generate component code and concatenate them
for(auto &comp : _components)
{
auto ckw_driver = comp->ckw_component_driver();
@@ -96,18 +92,31 @@ Window GpuCkwDriver::get_window() const
return root_comp->ckw_component_driver()->get_window();
}
-std::map<ITensorInfo::Id, GpuKernelArgument> GpuCkwDriver::get_tensors()
+GpuKernelArgumentList GpuCkwDriver::get_kernel_arguments()
{
- ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO"));
- // Assemble GpuKernelArguments
- std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
- for(const auto t : _components.get_argument_tensors())
+ GpuKernelArgumentList args{};
+ for(const auto &arg : _kernel.arguments())
{
- tensors.emplace(
- t->id(),
- GpuKernelArgument{ *t, { GpuKernelArgumentInfo::Type::Tensor_Special_0 } });
+ switch(arg.type())
+ {
+ case KernelArgument::Type::TensorStorage:
+ {
+ args.emplace_back(static_cast<ITensorInfo::Id>(arg.id()), from_ckw(arg.tensor_storage_type()));
+ break;
+ }
+ case KernelArgument::Type::TensorComponent:
+ {
+ args.emplace_back(static_cast<ITensorInfo::Id>(arg.id()), from_ckw(arg.tensor_component_type()));
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Unsupported KernelArgument Type");
+ break;
+ }
+ }
}
- return tensors;
+ return args;
}
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
index 2084b72098..c6e03f6e03 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
@@ -28,6 +28,8 @@
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h"
+#include "ckw/Kernel.h"
+
#include <map>
#include <string>
@@ -66,11 +68,12 @@ public:
std::string get_config_id() override;
/** Generate execution window */
Window get_window() const override;
- /** Get the kernel argument lists of the kernel*/
- std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() override;
+ /** Get the flat list of arguments of the kernel*/
+ GpuKernelArgumentList get_kernel_arguments() override;
private:
GpuKernelComponentGroup _components{};
+ ckw::Kernel _kernel;
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
index 154968775c..6f3eca711d 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
@@ -23,9 +23,10 @@
*/
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h"
#include <sstream>
@@ -35,7 +36,8 @@ namespace experimental
{
namespace dynamic_fusion
{
-GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, const std::string &alias)
+GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage,
+ const std::string &alias)
{
ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected");
@@ -59,7 +61,7 @@ GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelCo
std::stringstream ss;
ss << alias << "_t" << abs(tensor->id());
const auto uniq_name = ss.str();
- GpuCkwComponentArgument var{ writer->declare_tensor_argument(uniq_name.c_str(), to_ckw(*tensor)) };
+ GpuCkwComponentArgument var{ writer->declare_tensor_argument(uniq_name, to_ckw(*tensor), to_ckw(storage)) };
auto &&inserted = _vars.emplace(tensor->id(), var);
return &(inserted.first->second);
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
index 1c9cb083ea..0649dcba9d 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
@@ -24,8 +24,8 @@
#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
#include "arm_compute/core/ITensorInfo.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
#include <map>
@@ -37,6 +37,7 @@ namespace dynamic_fusion
{
class GpuKernelComponentGroup;
class GpuCkwScopedKernelWriter;
+enum class TensorStorageType;
/** A table of all the variables used in the kernel.
*
@@ -52,11 +53,13 @@ public:
* @param[in] comp_group Component group the tensor belongs to
* @param[in] writer Compute Kernel Writer
* @param[in] tensor Tensor info with which the new variable is associated
+ * @param[in] storage Tensor storage type associated with the tensor
* @param[in] alias Alias for the variable. Will be used as part of the variable name
*
* @return GpuCkwComponentArgument*
*/
- GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, const std::string &alias = "unnamed");
+ GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage,
+ const std::string &alias = "unnamed");
private:
std::map<ITensorInfo::Id, GpuCkwComponentArgument> _vars{};
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
index 224c176a31..c07fac0e0d 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
@@ -23,14 +23,15 @@
*/
#include "GpuCkwActivation.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "ckw/TensorTileSampler.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include <string>
@@ -84,8 +85,8 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
} // namespace
GpuCkwActivation::GpuCkwActivation(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
: IGpuCkwComponentDriver{ id, tensors },
_src{},
_dst{},
@@ -102,8 +103,8 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
load_src_dst_tiles_and_prepare_sampler(writer, src, dst, m0, n0, create_sampler);
@@ -111,14 +112,14 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp
auto &dst_tile = dst->tile();
// Constants
- const auto &constant_minus_1 = writer->declare_tile("minus_1", -1);
- const auto &constant_pos_1 = writer->declare_tile("one", 1);
- const auto &constant_zero = writer->declare_tile("zero", 0);
- const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a());
- const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b());
+ const auto &constant_minus_1 = writer->declare_tile("minus_1", -1);
+ const auto &constant_pos_1 = writer->declare_tile("one", 1);
+ const auto &constant_zero = writer->declare_tile("zero", 0);
+ const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a());
+ const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b());
// Perform the operation.
- switch (_attributes.activation())
+ switch(_attributes.activation())
{
case ActivationLayerInfo::ActivationFunction::LOGISTIC:
{
@@ -178,9 +179,9 @@ Window GpuCkwActivation::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- constexpr unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
index dd71c55df2..8d7e6a8c37 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
@@ -23,14 +23,15 @@
*/
#include "GpuCkwCast.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "ckw/TensorTileSampler.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h"
#include <string>
@@ -84,8 +85,8 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
} // namespace
GpuCkwCast::GpuCkwCast(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
: IGpuCkwComponentDriver{ id, tensors },
_src{},
_dst{},
@@ -102,8 +103,8 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
// Load the source tile and prepare the sampler.
if(!src->has_tile())
@@ -124,7 +125,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
if(!dst->has_tile())
{
// Get Target datatype and convert it to ckw::DataType.
- ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type());
+ ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type());
// Create dst_tile based on src_tile dimensions and with target DataType.
const TileInfo src_tile_info = src_tile.tile_info();
@@ -166,9 +167,9 @@ Window GpuCkwCast::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- constexpr unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
index 685bf391dc..15e32e26d5 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
@@ -23,14 +23,16 @@
*/
#include "GpuCkwElementwiseBinary.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "ckw/TensorTileSampler.h"
#include "ckw/types/TensorSamplerTypes.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include <string>
@@ -54,14 +56,20 @@ inline TensorTileSampler create_simple_sampler(GpuCkwScopedKernelWriter &writer,
auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32);
auto &gid_2 = writer->declare_tile("gid_2", ckw::DataType::Int32);
- auto &const_0 = writer->declare_tile("0", 0);
-
writer->op_get_global_id(gid_0, 0);
writer->op_get_global_id(gid_1, 1);
writer->op_get_global_id(gid_2, 2);
- sampler.x(gid_0);
- sampler.y(gid_1);
+ auto &x_coord = writer->declare_tile("x_coord", ckw::DataType::Int32);
+ auto &y_coord = writer->declare_tile("y_coord", ckw::DataType::Int32);
+ auto &m0_t = writer->declare_tile("m0", m0);
+ auto &n0_t = writer->declare_tile("n0", n0);
+ writer->op_binary_expression(x_coord, gid_0, ckw::BinaryOp::Mul, n0_t);
+ writer->op_binary_expression(y_coord, gid_1, ckw::BinaryOp::Mul, m0_t);
+
+ sampler.x(x_coord);
+ sampler.y(y_coord);
+ auto &const_0 = writer->declare_tile("0", 0);
sampler.z(const_0); // 3rd dimension collapsed with 2nd dimension
sampler.b(gid_2);
@@ -99,9 +107,9 @@ void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_gr
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, "lhs");
- GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, "rhs");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs");
+ GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs");
+ GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
// Load the LHS and RHS tiles and prepare the tensor sampler.
load_lhs_rhs_tiles_and_prepare_sampler(writer, lhs, rhs, m0, n0, create_simple_sampler);
@@ -131,10 +139,9 @@ Window GpuCkwElementwiseBinary::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- // constexpr unsigned int vector_size_byte_opencl = 16;
- // const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- const unsigned int num_elems_processed_per_iteration = 1U; // Hard-coded for now
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
index 63555e6064..247d1b834f 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
@@ -24,6 +24,7 @@
#include "GpuCkwStore.h"
#include "arm_compute/core/Error.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
@@ -43,8 +44,8 @@ GpuCkwStore::GpuCkwStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensor
}
void GpuCkwStore::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
{
- auto src = vtable.declare_variable(comp_group, writer, _src, "src");
- auto dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ auto src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ auto dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
auto &src_tile = src->tile();
const auto &sampler = src->tile_sampler();
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
index 9027bddd76..8a38d67d80 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
@@ -28,6 +28,7 @@
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
#include "ckw/TensorInfo.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
namespace arm_compute
{
@@ -98,6 +99,103 @@ inline ckw::TensorInfo to_ckw(const ITensorInfo &tensor_info)
tensor_info.id()
};
}
+
+inline TensorComponentType from_ckw(const ckw::TensorComponentType &component)
+{
+ switch(component)
+ {
+ case ckw::TensorComponentType::OffsetFirstElement:
+ return TensorComponentType::OffsetFirstElement;
+ break;
+ case ckw::TensorComponentType::Stride0:
+ return TensorComponentType::Stride0;
+ break;
+ case ckw::TensorComponentType::Stride1:
+ return TensorComponentType::Stride1;
+ break;
+ case ckw::TensorComponentType::Stride2:
+ return TensorComponentType::Stride2;
+ break;
+ case ckw::TensorComponentType::Stride3:
+ return TensorComponentType::Stride3;
+ break;
+ case ckw::TensorComponentType::Stride4:
+ return TensorComponentType::Stride4;
+ break;
+ case ckw::TensorComponentType::Dim0:
+ return TensorComponentType::Dim0;
+ break;
+ case ckw::TensorComponentType::Dim1:
+ return TensorComponentType::Dim1;
+ break;
+ case ckw::TensorComponentType::Dim2:
+ return TensorComponentType::Dim2;
+ break;
+ case ckw::TensorComponentType::Dim3:
+ return TensorComponentType::Dim3;
+ break;
+ case ckw::TensorComponentType::Dim4:
+ return TensorComponentType::Dim4;
+ break;
+ case ckw::TensorComponentType::Dim1xDim2:
+ return TensorComponentType::Dim1xDim2;
+ break;
+ case ckw::TensorComponentType::Dim2xDim3:
+ return TensorComponentType::Dim2xDim3;
+ break;
+ case ckw::TensorComponentType::Dim1xDim2xDim3:
+ return TensorComponentType::Dim1xDim2xDim3;
+ break;
+ case ckw::TensorComponentType::Unknown:
+ return TensorComponentType::Unknown;
+ default:
+ ARM_COMPUTE_ERROR("Unknown CKW tensor component");
+ return TensorComponentType::Unknown;
+ }
+}
+
+inline ckw::TensorStorageType to_ckw(const TensorStorageType &storage)
+{
+ switch(storage)
+ {
+ case TensorStorageType::ClBufferUint8Ptr:
+ return ckw::TensorStorageType::BufferUint8Ptr;
+ break;
+ case TensorStorageType::ClImage2dReadOnly:
+ return ckw::TensorStorageType::Texture2dReadOnly;
+ break;
+ case TensorStorageType::ClImage2dWriteOnly:
+ return ckw::TensorStorageType::Texture2dWriteOnly;
+ break;
+ case TensorStorageType::Unknown:
+ return ckw::TensorStorageType::Unknown;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unknown tensor storage type");
+ return ckw::TensorStorageType::Unknown;
+ }
+}
+inline TensorStorageType from_ckw(const ckw::TensorStorageType &storage)
+{
+ switch(storage)
+ {
+ case ckw::TensorStorageType::BufferUint8Ptr:
+ return TensorStorageType::ClBufferUint8Ptr;
+ break;
+ case ckw::TensorStorageType::Texture2dReadOnly:
+ return TensorStorageType::ClImage2dReadOnly;
+ break;
+ case ckw::TensorStorageType::Texture2dWriteOnly:
+ return TensorStorageType::ClImage2dWriteOnly;
+ break;
+ case ckw::TensorStorageType::Unknown:
+ return TensorStorageType::Unknown;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unknown CKW tensor storage type");
+ return TensorStorageType::Unknown;
+ }
+}
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
index d600956b4f..af766a7ece 100644
--- a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
+++ b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
@@ -104,9 +104,11 @@ public:
{
return _properties;
}
- /** Get template writer for the component */
- virtual const IGpuTemplateComponentWriter *template_writer() const = 0;
- /** Get compute kernel writer driver for the component */
+ /** Get writer for the component */
+ virtual const IGpuTemplateComponentWriter *template_writer() const
+ {
+ return nullptr;
+ }
virtual const IGpuCkwComponentDriver *ckw_component_driver() const
{
return nullptr;
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
index d2cde40a10..c41257d18c 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
@@ -24,8 +24,11 @@
#include "ClComponentActivation.h"
#include "src/core/CL/CLValidate.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h"
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h"
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -66,8 +69,17 @@ ClComponentActivation::ClComponentActivation(ComponentId
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
: IGpuKernelComponent{ id, properties, tensors },
- _component_writer{ std::make_unique<ClTemplateActivation>(id, tensors, attributes) },
- _ckw_driver{ std::make_unique<GpuCkwActivation>(id, tensors, attributes) }
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
+{
+ std::make_unique<ClTemplateActivation>(id, tensors, attributes)
+}
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
+{
+ std::make_unique<GpuCkwActivation>(id, tensors, attributes)
+}
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
}
@@ -75,15 +87,15 @@ ClComponentActivation::~ClComponentActivation()
{
}
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *ClComponentActivation::template_writer() const
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+const IGpuCkwComponentDriver *ClComponentActivation::ckw_component_driver() const
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
-const IGpuCkwComponentDriver *ClComponentActivation::ckw_component_driver() const
-{
- return _ckw_driver.get();
-}
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
index bb6f7c6e30..ebe8719420 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
@@ -42,8 +42,11 @@ template <typename T>
class ArgumentPack;
/** Forward declaration */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
class ClTemplateActivation;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwActivation;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentActivation final : public IGpuKernelComponent
{
@@ -106,10 +109,12 @@ public:
/** Allow instances of this class to be moved */
ClComponentActivation &operator=(ClComponentActivation &&component) = default;
- /** Get template writer for the component */
+ /** Get writer for the component */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *template_writer() const override;
-
- const IGpuCkwComponentDriver *ckw_component_driver() const override;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
@@ -118,8 +123,11 @@ public:
}
private:
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<ClTemplateActivation> _component_writer;
- std::unique_ptr<GpuCkwActivation> _ckw_driver;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ std::unique_ptr<GpuCkwActivation> _component_writer;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
index 92933ae7a5..635869f817 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
@@ -26,8 +26,11 @@
#include "arm_compute/core/Error.h"
#include "src/core/CL/CLValidate.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h"
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h"
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h"
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -67,23 +70,32 @@ ClComponentCast::ClComponentCast(ComponentId id,
const Attributes &attributes,
const Settings &settings)
: IGpuKernelComponent{ id, properties, tensors },
- _component_writer{ std::make_unique<ClTemplateCast>(id, tensors, attributes) },
- _ckw_driver{ std::make_unique<GpuCkwCast>(id, tensors, attributes) }
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
+{
+ std::make_unique<ClTemplateCast>(id, tensors, attributes)
+}
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
+{
+ std::make_unique<GpuCkwCast>(id, tensors, attributes)
+}
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
ARM_COMPUTE_UNUSED(attributes, settings);
}
ClComponentCast::~ClComponentCast()
{
}
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *ClComponentCast::template_writer() const
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+const IGpuCkwComponentDriver *ClComponentCast::ckw_component_driver() const
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
return _component_writer.get();
}
-const IGpuCkwComponentDriver *ClComponentCast::ckw_component_driver() const
-{
- return _ckw_driver.get();
-}
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
index 174f9670b3..37b8cbb6c9 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
@@ -48,8 +48,11 @@ private:
};
/** Forward declaration */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
class ClTemplateCast;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwCast;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentCast final : public IGpuKernelComponent
{
@@ -116,10 +119,12 @@ public:
ClComponentCast(ClComponentCast &&component) = default;
/** Allow instances of this class to be moved */
ClComponentCast &operator=(ClComponentCast &&component) = default;
- /** Get template writer for the component */
+ /** Get writer for the component */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *template_writer() const override;
- /** Get GPU kernel writer for the component */
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ckw_component_driver() const override;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
{
@@ -127,8 +132,11 @@ public:
}
private:
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<ClTemplateCast> _component_writer;
- std::unique_ptr<GpuCkwCast> _ckw_driver;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ std::unique_ptr<GpuCkwCast> _component_writer;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
index 52739e23c0..88d729170c 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
@@ -25,8 +25,11 @@
#include "arm_compute/core/Validate.h"
#include "src/core/CL/CLValidate.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h"
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h"
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h"
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
namespace arm_compute
{
@@ -106,28 +109,38 @@ Status ClComponentElementwiseBinary::validate(const ArgumentPack<ITensorInfo> &t
return Status{};
}
+ClComponentElementwiseBinary::~ClComponentElementwiseBinary()
+{
+}
ClComponentElementwiseBinary::ClComponentElementwiseBinary(
ComponentId id,
const Properties &properties,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
: IGpuKernelComponent{ id, properties, tensors },
- _component_writer{ std::make_unique<ClTemplateElementwiseBinary>(id, tensors, attributes) },
- _ckw_driver{ std::make_unique<GpuCkwElementwiseBinary>(id, tensors, attributes) }
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
{
+ std::make_unique<ClTemplateElementwiseBinary>(id, tensors, attributes)
}
-ClComponentElementwiseBinary::~ClComponentElementwiseBinary()
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
{
+ std::make_unique<GpuCkwElementwiseBinary>(id, tensors, attributes)
}
-const IGpuTemplateComponentWriter *ClComponentElementwiseBinary::template_writer() const
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
- return _component_writer.get();
}
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+const IGpuTemplateComponentWriter *ClComponentElementwiseBinary::template_writer() const
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentElementwiseBinary::ckw_component_driver() const
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
- return _ckw_driver.get();
+ return _component_writer.get();
}
+
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
index a56dd8b37d..f7175903d0 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY
-#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
@@ -40,8 +40,11 @@ template <typename T>
class ArgumentPack;
/** Forward declaration */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
class ClTemplateElementwiseBinary;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwElementwiseBinary;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentElementwiseBinary final : public IGpuKernelComponent
{
@@ -101,10 +104,13 @@ public:
ClComponentElementwiseBinary(ClComponentElementwiseBinary &&component) = default;
/** Allow instances of this class to be moved */
ClComponentElementwiseBinary &operator=(ClComponentElementwiseBinary &&component) = default;
- /** Get template writer for the component */
+ /** Get writer for the component */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *template_writer() const override;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuCkwComponentDriver *ckw_component_driver() const override;
/** Get component type */
GpuComponentType type() const override
{
@@ -112,10 +118,13 @@ public:
}
private:
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<ClTemplateElementwiseBinary> _component_writer;
- std::unique_ptr<GpuCkwElementwiseBinary> _ckw_driver;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ std::unique_ptr<GpuCkwElementwiseBinary> _component_writer;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY */
+#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY */
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
index a3283b1866..12b81c3d56 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
@@ -24,8 +24,11 @@
#include "ClComponentStore.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h"
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h"
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h"
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
#include <memory>
@@ -43,19 +46,30 @@ Status ClComponentStore::validate(
return Status{};
}
ClComponentStore::ClComponentStore(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors)
- : IGpuKernelComponent{ id, properties, tensors }, _component_writer{ std::make_unique<ClTemplateStore>(id, tensors) }, _ckw_driver{ std::make_unique<GpuCkwStore>(id, tensors) }
+ : IGpuKernelComponent{ id, properties, tensors },
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
{
+ std::make_unique<ClTemplateStore>(id, tensors)
}
-ClComponentStore::~ClComponentStore()
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer
{
+ std::make_unique<GpuCkwStore>(id, tensors)
}
-const IGpuTemplateComponentWriter *ClComponentStore::template_writer() const
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
- return _component_writer.get();
}
+ClComponentStore::~ClComponentStore()
+{
+}
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+const IGpuTemplateComponentWriter *ClComponentStore::template_writer() const
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentStore::ckw_component_driver() const
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
- return _ckw_driver.get();
+ return _component_writer.get();
}
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
index f168ccb97e..853ee39012 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
@@ -25,7 +25,6 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
-#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h"
#include <memory>
namespace arm_compute
@@ -39,7 +38,11 @@ namespace dynamic_fusion
/** Forward declaration */
template <typename T>
class ArgumentPack;
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
+class ClTemplateStore;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwStore;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
class ClComponentStore final : public IGpuKernelComponent
{
@@ -85,10 +88,12 @@ public:
ClComponentStore(ClComponentStore &&component) = default;
/** Allow instances of this class to be moved */
ClComponentStore &operator=(ClComponentStore &&component) = default;
- /** Get template writer for the component */
+ /** Get writer for the component */
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *template_writer() const override;
-
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ckw_component_driver() const override;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
{
@@ -96,8 +101,11 @@ public:
}
private:
+#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<ClTemplateStore> _component_writer;
- std::unique_ptr<GpuCkwStore> _ckw_driver;
+#else //ACL_INTERNAL_TEST_CKW_IN_DF
+ std::unique_ptr<GpuCkwStore> _component_writer;
+#endif //ACL_INTERNAL_TEST_CKW_IN_DF
};
} // namespace dynamic_fusion
} // namespace experimental