aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-01-22 05:47:37 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-02-01 16:51:44 +0000
commitea857273d8b4a94fb7f1e63ce9068a60259fb9d3 (patch)
treee4d7a90f28d4ba4d218e77343c107afa137d7cec
parent655e8c6334580a570008243af1896d269fdd60ad (diff)
downloadComputeLibrary-ea857273d8b4a94fb7f1e63ce9068a60259fb9d3.tar.gz
Compress OpenCL kernel files using zlib for Android
Kernel files are embedded into the binary as the default option when building which leads to binary size bloating. Add `compress_kernels` option and utilize zlib for further compressing the text kernel files and reduce the overall binary size. We use a base64 encoding/decoding to ensure that the strings can be easily embedded. This adds to the binary size but still the overall reduction is significant. Maximum compression level 9 is used. Option is currently restricted to Android builds as android toolchain provides a zlib library. Initial experimentations indicate a binary size reduction of 50% Resolves: COMPMID-4017 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: Iee81b8c00391b26a5f41642699692928a4d6bd6e Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4958 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
-rw-r--r--SConscript11
-rw-r--r--SConstruct8
-rw-r--r--arm_compute/core/CL/CLKernelLibrary.h11
-rw-r--r--docs/00_introduction.dox6
-rw-r--r--src/core/CL/CLKernelLibrary.cpp162
5 files changed, 187 insertions, 11 deletions
diff --git a/SConscript b/SConscript
index 6b727f5a82..7e09240768 100644
--- a/SConscript
+++ b/SConscript
@@ -23,6 +23,9 @@ import collections
import os.path
import re
import subprocess
+import zlib
+import base64
+import string
VERSION = "v0.0-unreleased"
LIBRARY_VERSION_MAJOR = 21
@@ -112,15 +115,17 @@ def resolve_includes(target, source, env):
tmp_file = updated_file
# Append and prepend string literal identifiers and add expanded file to final list
- tmp_file.insert(0, "R\"(\n")
- tmp_file.append("\n)\"")
entry = FileEntry(target_name=file[1].target_name, file_contents=tmp_file)
final_files.append((file[0], entry))
# Write output files
for file in final_files:
with open(file[1].target_name.get_path(), 'w+') as out_file:
- out_file.write( "\n".join( file[1].file_contents ))
+ file_to_write = "\n".join( file[1].file_contents )
+ if env['compress_kernels']:
+ file_to_write = zlib.compress(file_to_write, 9).encode("base64").replace("\n", "")
+ file_to_write = "R\"(" + file_to_write + ")\""
+ out_file.write(file_to_write)
def create_version_file(target, source, env):
# Generate string with build options library version to embed in the library:
diff --git a/SConstruct b/SConstruct
index 94cf80d12a..d5461afe42 100644
--- a/SConstruct
+++ b/SConstruct
@@ -56,6 +56,7 @@ vars.AddVariables(
BoolVariable("neon", "Enable Neon support", False),
BoolVariable("gles_compute", "Enable OpenGL ES Compute Shader support", False),
BoolVariable("embed_kernels", "Embed OpenCL kernels and OpenGL ES compute shaders in library binary", True),
+ BoolVariable("compress_kernels", "Compress embedded OpenCL kernels in library binary. Note embed_kernels should be enabled", False),
BoolVariable("set_soname", "Set the library's soname and shlibversion (requires SCons 2.4 or above)", False),
BoolVariable("tracing", "Enable runtime tracing", False),
BoolVariable("openmp", "Enable OpenMP backend", False),
@@ -135,6 +136,10 @@ if env['os'] == 'bare_metal':
print("ERROR: OpenMP and C++11 threads not supported in bare_metal. Use cppthreads=0 openmp=0")
Exit(1)
+if env['opencl'] and env['embed_kernels'] and env['compress_kernels'] and env['os'] not in ['android']:
+ print("Compressed kernels are supported only for android builds")
+ Exit(1)
+
if not env['exceptions']:
if env['opencl'] or env['gles_compute']:
print("ERROR: OpenCL and GLES are not supported when building without exceptions. Use opencl=0 gles_compute=0")
@@ -349,6 +354,9 @@ if env["os"] not in ["android", "bare_metal"] and (env['opencl'] or env['cppthre
if env['opencl'] or env['gles_compute']:
if env['embed_kernels']:
env.Append(CPPDEFINES = ['EMBEDDED_KERNELS'])
+ if env['compress_kernels']:
+ env.Append(CPPDEFINES = ['ARM_COMPUTE_COMPRESSED_KERNELS'])
+ env.Append(LIBS = ['z'])
if env['debug']:
env['asserts'] = True
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h
index caab78dc18..193389388e 100644
--- a/arm_compute/core/CL/CLKernelLibrary.h
+++ b/arm_compute/core/CL/CLKernelLibrary.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -164,10 +164,11 @@ public:
CLCompileContext &get_compile_context();
private:
- CLCompileContext _compile_context; /**< Compile Context. */
- std::string _kernel_path; /**< Path to the kernels folder. */
- static const std::map<std::string, std::string> _kernel_program_map; /**< Map that associates kernel names with programs. */
- static const std::map<std::string, std::string> _program_source_map; /**< Contains sources for all programs.
+ CLCompileContext _compile_context; /**< Compile Context. */
+ std::string _kernel_path; /**< Path to the kernels folder. */
+ mutable std::map<std::string, std::string> _decompressed_source_map; /**< Map holding the decompressed files when compression is used */
+ static const std::map<std::string, std::string> _kernel_program_map; /**< Map that associates kernel names with programs. */
+ static const std::map<std::string, std::string> _program_source_map; /**< Contains sources for all programs.
Used for compile-time kernel inclusion. >*/
};
} // namespace arm_compute
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index ccc44d48e0..6a4d03b527 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -1388,6 +1388,10 @@ To see the build options available simply run ```scons -h```:
default: True
actual: True
+ compress_kernels: Compress embedded OpenCL kernels in library binary. Note embed_kernels should be enabled as well (yes|no)
+ default: False
+ actual: False
+
set_soname: Set the library's soname and shlibversion (requires SCons 2.4 or above) (yes|no)
default: False
actual: False
@@ -1508,6 +1512,8 @@ To see the build options available simply run ```scons -h```:
There is also an 'embed_only' option which will generate all the .embed files for the OpenCL kernels and / or OpenGLES compute shaders. This might be useful if using a different build system to compile the library.
+In addittion the option 'compress_kernels' will compress the embedded OpenCL kernel files using zlib and inject them in the library. This is useful for reducing the binary size. Note, this option is only available for Android when 'embed_kernels' is enabled.
+
@b Werror: If you are compiling using the same toolchains as the ones used in this guide then there shouldn't be any warning and therefore you should be able to keep Werror=1. If with a different compiler version the library fails to build because of warnings interpreted as errors then, if you are sure the warnings are not important, you might want to try to build with Werror=0 (But please do report the issue either on Github or by an email to developer@arm.com so that the issue can be addressed).
@b opencl / @b neon / @b gles_compute: Choose which SIMD technology you want to target. (NEON for ARM Cortex-A CPUs or OpenCL / GLES_COMPUTE for ARM Mali GPUs)
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index e5cf8c6903..cf1c52e463 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -29,10 +29,151 @@
#include "support/StringSupport.h"
#include <algorithm>
+#include <array>
#include <fstream>
#include <utility>
#include <vector>
+#ifdef ARM_COMPUTE_COMPRESSED_KERNELS
+#include <zlib.h>
+
+namespace
+{
+/* Decoding table */
+constexpr std::array<uint8_t, 256> b64_invtab =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63,
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0,
+ 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/** Decode a base64 encoded string
+ *
+ * @param[in] str Base64 encoded string to decode
+ *
+ * @return The decode string in case of a valid, non-empty string otherwise an empty string
+ */
+std::string decode_base64(const std::string &str)
+{
+ constexpr const char pad_char = '=';
+
+ // Handle empty string
+ if(str.empty())
+ {
+ return {};
+ }
+
+ // Base64 encoded string has size multiple of 4
+ if(str.length() % 4)
+ {
+ return {};
+ }
+
+ //
+ // Check encoded string padding
+ std::size_t padding = (str.rbegin()[0] == pad_char) + (str.rbegin()[1] == pad_char);
+ const int str_len = str.size();
+
+ // Reserve memory for the decoded string
+ // Note each 4 consecutive elements of 6-bit encode 3 bytes
+ std::string dec_b64;
+ dec_b64.reserve(((str_len / 4) * 3));
+
+ // Block decoding function (exclude padding)
+ int c = 0;
+ const int end = str_len - 4 - padding;
+ for(; c <= end; c += 4)
+ {
+ const int byte0 = b64_invtab[str[c]];
+ const int byte1 = b64_invtab[str[c + 1]];
+ const int byte2 = b64_invtab[str[c + 2]];
+ const int byte3 = b64_invtab[str[c + 3]];
+
+ dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
+ dec_b64.push_back((byte1 << 4) | (byte2 >> 2));
+ dec_b64.push_back((byte2 << 6) | (byte3));
+ }
+
+ // Last step that might contain padding symbols
+ if(padding == 1)
+ {
+ const int byte0 = b64_invtab[str[c]];
+ const int byte1 = b64_invtab[str[c + 1]];
+ const int byte2 = b64_invtab[str[c + 2]];
+
+ dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
+ dec_b64.push_back((byte1 << 4) | (byte2 >> 2));
+ }
+ else if(padding == 2)
+ {
+ const int byte0 = b64_invtab[str[c]];
+ const int byte1 = b64_invtab[str[c + 1]];
+
+ dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
+ }
+
+ return dec_b64;
+}
+
+/** Decompress a zlib compressed string
+ *
+ * @param[in] str ZLib compressed string
+ *
+ * @return The decompressed string if successful, otherwise false.
+ */
+std::string decompress_zlib(const std::string &str)
+{
+ // Create and initialize decompression stream
+ z_stream ds{};
+ if(inflateInit(&ds) != Z_OK)
+ {
+ return std::string();
+ }
+ ds.avail_in = str.size();
+ ds.next_in = (Bytef *)str.data();
+
+ // Roll-over the string using a buffer and decompress
+ int status = Z_OK;
+ char roll_buff[16384];
+ std::string inflated_str;
+ do
+ {
+ ds.avail_out = sizeof(roll_buff);
+ ds.next_out = reinterpret_cast<Bytef *>(roll_buff);
+
+ status = inflate(&ds, 0);
+ if(inflated_str.size() < ds.total_out)
+ {
+ inflated_str.append(roll_buff, ds.total_out - inflated_str.size());
+ }
+ }
+ while(status == Z_OK);
+
+ // Finalize decompression stream
+ inflateEnd(&ds);
+ if(status != Z_STREAM_END)
+ {
+ return std::string();
+ }
+
+ return inflated_str;
+}
+} // namespace
+#endif /* ARM_COMPUTE_COMPRESSED_KERNELS */
+
using namespace arm_compute;
const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{
@@ -970,7 +1111,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
};
CLKernelLibrary::CLKernelLibrary()
- : _compile_context(), _kernel_path()
+ : _compile_context(), _kernel_path(), _decompressed_source_map()
{
opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the CLKernelLibrary is built
}
@@ -1068,14 +1209,29 @@ bool CLKernelLibrary::int64_base_atomics_supported() const
std::pair<std::string, bool> CLKernelLibrary::get_program(const std::string &program_name) const
{
#ifdef EMBEDDED_KERNELS
- const auto program_source_it = _program_source_map.find(program_name);
+#ifdef ARM_COMPUTE_COMPRESSED_KERNELS
+ const auto inflatted_program_source_it = _decompressed_source_map.find(program_name);
+ if(inflatted_program_source_it != _decompressed_source_map.end())
+ {
+ return std::make_pair(inflatted_program_source_it->second, false);
+ }
+#endif /* ARM_COMPUTE_COMPRESSED_KERNELS */
+ const auto program_source_it = _program_source_map.find(program_name);
if(program_source_it == _program_source_map.end())
{
ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
}
+ std::string program_source = program_source_it->second;
+
+#ifdef ARM_COMPUTE_COMPRESSED_KERNELS
+ std::string decompressed_program_source = decompress_zlib(decode_base64(program_source_it->second));
+ ARM_COMPUTE_ERROR_ON_MSG(decompressed_program_source.empty(), "Cannot de-compress requested program");
+ _decompressed_source_map.insert(std::make_pair(program_name, decompressed_program_source));
+ program_source = std::move(decompressed_program_source);
+#endif /* ARM_COMPUTE_COMPRESSED_KERNELS */
- return std::make_pair(program_source_it->second, false);
+ return std::make_pair(program_source, false);
#else /* EMBEDDED_KERNELS */
// Check for binary
std::string source_name = _kernel_path + program_name;