From b9fc33c194036973273604d5fd7af9e814133238 Mon Sep 17 00:00:00 2001 From: Louis Verhaard Date: Thu, 13 Aug 2020 11:47:36 +0200 Subject: MLBEDSW-2688: LeakyRelu rewrite to LUT or MUL/MAX Replaces LeakyRelu operations with LUT activation function when possible, else to a combination of multiplication/maximization. Signed-off-by: Louis Verhaard Change-Id: I3d2eb2dba7145997c3cc711d0ef18ab355fbb416 --- ethosu/vela/lut.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'ethosu/vela/lut.py') diff --git a/ethosu/vela/lut.py b/ethosu/vela/lut.py index 39101fac..0e8dcc95 100644 --- a/ethosu/vela/lut.py +++ b/ethosu/vela/lut.py @@ -18,8 +18,11 @@ import uuid from functools import lru_cache +import numpy as np + from . import numeric_util from .high_level_command_stream import CommandType +from .tensor import create_const_tensor from .tensor import TensorPurpose @@ -85,6 +88,19 @@ def get_lut_index(arch, lut_tensor): return slot +def create_lut_tensor(name, values, dtype): + # Creates constant LUT tensor with the given values as lookup table. + # The tensor's equivalence_id is based on these values, so if multiple + # LUT tensors are created with identical values, they will get the same + # address in constant memory, and unnecessary DMA operations can be avoided. + sz = len(values) + assert sz in (256, 512) + ntype = np.uint8 if dtype.size_in_bytes() == 1 else np.uint32 + tens = create_const_tensor(name, [1, 1, 1, sz], dtype, values, ntype, TensorPurpose.LUT) + tens.equivalence_id = create_equivalence_id(tuple(values)) + return tens + + def optimize_high_level_cmd_stream(sg, arch): # - Allocates SHRAM address/lut index to LUT tensors # - Removes unnecessary DMA operations of LUT-s that are already present in SHRAM from sg's command stream -- cgit v1.2.1