diff options
author | Alex Tawse <alex.tawse@arm.com> | 2024-04-05 11:45:51 +0100 |
---|---|---|
committer | Alex Tawse <alex.tawse@arm.com> | 2024-05-20 09:27:19 +0100 |
commit | 51928a3fd979a36e6c7a9f73bf0ed3bc8a8b7dfd (patch) | |
tree | 266b4498781c93feebf44808a847f5f6c0c9941b /scripts | |
parent | e0829d313211e5bd0176a6bfef9e07056eb1bbbf (diff) | |
download | ml-embedded-evaluation-kit-51928a3fd979a36e6c7a9f73bf0ed3bc8a8b7dfd.tar.gz |
MLECO-4980: Adding Arm Ethos-U85 beta support
* Adds beta support for Ethos-U85.
* By default, models will be compiled for
U85-512 as well as the existing U55-128
and U65-256 MAC configurations.
* All U85 MAC configurations are supported.
Change-Id: If11f09c581084b27cf02a91eb74b2b094fe70c3e
Signed-off-by: Alex Tawse <alex.tawse@arm.com>
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/cmake/configuration_options/npu_opts.cmake | 36 | ||||
-rw-r--r-- | scripts/cmake/platforms/mps3/build_configuration.cmake | 6 | ||||
-rw-r--r-- | scripts/cmake/timing_adapter/ta_config_u55_high_end.cmake | 90 | ||||
-rw-r--r-- | scripts/cmake/timing_adapter/ta_config_u65_high_end.cmake | 58 | ||||
-rw-r--r-- | scripts/cmake/timing_adapter/ta_config_u85_high_end.cmake | 68 | ||||
-rw-r--r-- | scripts/py/vela_configs.py | 150 | ||||
-rw-r--r-- | scripts/vela/default_vela.ini | 69 |
7 files changed, 383 insertions, 94 deletions
diff --git a/scripts/cmake/configuration_options/npu_opts.cmake b/scripts/cmake/configuration_options/npu_opts.cmake index c947ac9..867fdf0 100644 --- a/scripts/cmake/configuration_options/npu_opts.cmake +++ b/scripts/cmake/configuration_options/npu_opts.cmake @@ -49,21 +49,27 @@ USER_OPTION(ETHOS_U_NPU_ID "Arm Ethos-U NPU IP (U55 or U65)" "U55" STRING) -if ((ETHOS_U_NPU_ID STREQUAL U55) OR (ETHOS_U_NPU_ID STREQUAL U65)) - if (ETHOS_U_NPU_ID STREQUAL U55) - set(DEFAULT_NPU_MEM_MODE "Shared_Sram") - set(DEFAULT_NPU_CONFIG_ID "H128") - elseif(ETHOS_U_NPU_ID STREQUAL U65) - set(DEFAULT_NPU_MEM_MODE "Dedicated_Sram") - set(DEFAULT_NPU_CONFIG_ID "Y256") - set(DEFAULT_NPU_CACHE_SIZE "393216") +if (ETHOS_U_NPU_ID STREQUAL U55) + set(DEFAULT_NPU_MEM_MODE "Shared_Sram") + set(DEFAULT_NPU_CONFIG_ID "H128") + set(DEFAULT_TA_CONFIG_FILE "ta_config_u55_high_end") +elseif (ETHOS_U_NPU_ID STREQUAL U65) + set(DEFAULT_NPU_MEM_MODE "Dedicated_Sram") + set(DEFAULT_NPU_CONFIG_ID "Y256") + set(DEFAULT_TA_CONFIG_FILE "ta_config_u65_high_end") +elseif (ETHOS_U_NPU_ID STREQUAL U85) + set(DEFAULT_NPU_MEM_MODE "Dedicated_Sram") + set(DEFAULT_NPU_CONFIG_ID "Z512") + set(DEFAULT_TA_CONFIG_FILE "ta_config_u85_high_end") +else() + message(FATAL_ERROR "Non compatible Ethos-U NPU processor ${ETHOS_U_NPU_ID}") +endif() - USER_OPTION(ETHOS_U_NPU_CACHE_SIZE "Arm Ethos-U65 NPU Cache Size" +if(DEFAULT_NPU_MEM_MODE STREQUAL "Dedicated_Sram") + set(DEFAULT_NPU_CACHE_SIZE "393216") + USER_OPTION(ETHOS_U_NPU_CACHE_SIZE "Arm Ethos-U NPU Cache Size" "${DEFAULT_NPU_CACHE_SIZE}" STRING) - endif() -else () - message(FATAL_ERROR "Non compatible Ethos-U NPU processor ${ETHOS_U_NPU_ID}") endif () USER_OPTION(ETHOS_U_NPU_MEMORY_MODE "Specifies the memory mode used in the Vela command." @@ -74,12 +80,6 @@ USER_OPTION(ETHOS_U_NPU_CONFIG_ID "Specifies the configuration ID for the NPU." "${DEFAULT_NPU_CONFIG_ID}" STRING) -if (ETHOS_U_NPU_ID STREQUAL U55) - set(DEFAULT_TA_CONFIG_FILE "ta_config_u55_high_end") -else () - set(DEFAULT_TA_CONFIG_FILE "ta_config_u65_high_end") -endif () - USER_OPTION(ETHOS_U_NPU_TIMING_ADAPTER_ENABLED "Specifies if the Ethos-U timing adapter is enabled" ON BOOL) diff --git a/scripts/cmake/platforms/mps3/build_configuration.cmake b/scripts/cmake/platforms/mps3/build_configuration.cmake index c29531f..aa521bf 100644 --- a/scripts/cmake/platforms/mps3/build_configuration.cmake +++ b/scripts/cmake/platforms/mps3/build_configuration.cmake @@ -121,6 +121,11 @@ function(platform_custom_post_build) set(AXF_PATH "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${PARSED_TARGET_NAME}.axf") set(TEST_TARGET_NAME "${use_case}_fvp_test") + set(FVP_CONFIG_ARG "") + if (ETHOS_U_NPU_ID STREQUAL "U85") + set(FVP_CONFIG_ARG -C ethosu.config="${ETHOS_U_NPU_CONFIG_ID}") + endif () + message(STATUS "Adding FVP test for ${use_case}") add_test( @@ -130,6 +135,7 @@ function(platform_custom_post_build) -C mps3_board.uart0.out_file='-' -C mps3_board.uart0.shutdown_on_eot=1 -C mps3_board.visualisation.disable-visualisation=1 + ${FVP_CONFIG_ARG} --stat) endif() endif () diff --git a/scripts/cmake/timing_adapter/ta_config_u55_high_end.cmake b/scripts/cmake/timing_adapter/ta_config_u55_high_end.cmake index 37785e3..8ccf3b6 100644 --- a/scripts/cmake/timing_adapter/ta_config_u55_high_end.cmake +++ b/scripts/cmake/timing_adapter/ta_config_u55_high_end.cmake @@ -23,67 +23,67 @@ # The platform CMake infra should set the base register values for # TA component to work. For Ethos-U55, we need two base addresses. -if (NOT DEFINED TA0_BASE OR NOT DEFINED TA1_BASE) - message(FATAL_ERROR "TA0_BASE and TA1_BASE need to be defined.") +if (NOT DEFINED TA_SRAM0_BASE OR NOT DEFINED TA_EXT0_BASE) + message(FATAL_ERROR "TA_SRAM0_BASE and TA_EXT0_BASE need to be defined.") endif () -message(STATUS "using TA0_BASE @ ${TA0_BASE}; TA1_BASE @ ${TA1_BASE}.") +message(STATUS "using TA_SRAM0_BASE @ ${TA_SRAM0_BASE}; TA_EXT0_BASE @ ${TA_EXT0_BASE}.") -# Timing adapter settings for AXI0 -set(TA0_MAXR "8" CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite") -set(TA0_MAXW "8" CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite") -set(TA0_MAXRW "0" CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite") -set(TA0_RLATENCY "32" CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.") -set(TA0_WLATENCY "32" CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.") -set(TA0_PULSE_ON "3999" CACHE STRING "No. of cycles addresses let through (0-65535).") -set(TA0_PULSE_OFF "1" CACHE STRING "No. of cycles addresses blocked (0-65535).") -set(TA0_BWCAP "4000" CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite") -set(TA0_PERFCTRL "0" CACHE STRING "6-bit field selecting an event for event counter 0=default") -set(TA0_PERFCNT "0" CACHE STRING "32-bit event counter") -set(TA0_MODE "1" CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun; +# Timing adapter settings for SRAM +set(SRAM_MAXR "8" CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite") +set(SRAM_MAXW "8" CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite") +set(SRAM_MAXRW "0" CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite") +set(SRAM_RLATENCY "32" CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.") +set(SRAM_WLATENCY "32" CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.") +set(SRAM_PULSE_ON "3999" CACHE STRING "No. of cycles addresses let through (0-65535).") +set(SRAM_PULSE_OFF "1" CACHE STRING "No. of cycles addresses blocked (0-65535).") +set(SRAM_BWCAP "4000" CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite") +set(SRAM_PERFCTRL "0" CACHE STRING "6-bit field selecting an event for event counter 0=default") +set(SRAM_PERFCNT "0" CACHE STRING "32-bit event counter") +set(SRAM_MODE "1" CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun; Bit 1: 1=enable random AR reordering (0=default); Bit 2: 1=enable random R reordering (0=default); Bit 3: 1=enable random B reordering (0=default); Bit 11-4: Frequency scale 0=full speed, 255=(1/256) speed") -set(TA0_HISTBIN "0" CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.") -set(TA0_HISTCNT "0" CACHE STRING "32-bit field. Read/write the selected histogram bin.") +set(SRAM_HISTBIN "0" CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.") +set(SRAM_HISTCNT "0" CACHE STRING "32-bit field. Read/write the selected histogram bin.") -# Timing adapter settings for AXI1 -# If Memory mode is Sram_Only Timing adapter settings for AXI1 need to match the same as AXI0 +# Timing adapter settings for EXT +# If Memory mode is Sram_Only Timing adapter settings for EXT need to match the same as SRAM if (ETHOS_U_NPU_MEMORY_MODE STREQUAL Sram_Only) - set(TA1_MAXR ${TA0_MAXR} CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite") - set(TA1_MAXW ${TA0_MAXW} CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite") - set(TA1_MAXRW ${TA0_MAXRW} CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite") - set(TA1_RLATENCY ${TA0_RLATENCY} CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.") - set(TA1_WLATENCY ${TA0_WLATENCY} CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.") - set(TA1_PULSE_ON ${TA0_PULSE_ON} CACHE STRING "No. of cycles addresses let through (0-65535).") - set(TA1_PULSE_OFF ${TA0_PULSE_OFF} CACHE STRING "No. of cycles addresses blocked (0-65535).") - set(TA1_BWCAP ${TA0_BWCAP} CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite") - set(TA1_PERFCTRL ${TA0_PERFCTRL} CACHE STRING "6-bit field selecting an event for event counter 0=default") - set(TA1_PERFCNT ${TA0_PERFCNT} CACHE STRING "32-bit event counter") - set(TA1_MODE ${TA0_MODE} CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun; + set(EXT_MAXR ${SRAM_MAXR} CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite") + set(EXT_MAXW ${SRAM_MAXW} CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite") + set(EXT_MAXRW ${SRAM_MAXRW} CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite") + set(EXT_RLATENCY ${SRAM_RLATENCY} CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.") + set(EXT_WLATENCY ${SRAM_WLATENCY} CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.") + set(EXT_PULSE_ON ${SRAM_PULSE_ON} CACHE STRING "No. of cycles addresses let through (0-65535).") + set(EXT_PULSE_OFF ${SRAM_PULSE_OFF} CACHE STRING "No. of cycles addresses blocked (0-65535).") + set(EXT_BWCAP ${SRAM_BWCAP} CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite") + set(EXT_PERFCTRL ${SRAM_PERFCTRL} CACHE STRING "6-bit field selecting an event for event counter 0=default") + set(EXT_PERFCNT ${SRAM_PERFCNT} CACHE STRING "32-bit event counter") + set(EXT_MODE ${SRAM_MODE} CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun; Bit 1: 1=enable random AR reordering (0=default); Bit 2: 1=enable random R reordering (0=default); Bit 3: 1=enable random B reordering (0=default); Bit 11-4: Frequency scale 0=full speed, 255=(1/256) speed") - set(TA1_HISTBIN ${TA0_HISTBIN} CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.") - set(TA1_HISTCNT ${TA0_HISTCNT} CACHE STRING "32-bit field. Read/write the selected histogram bin.") + set(EXT_HISTBIN ${SRAM_HISTBIN} CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.") + set(EXT_HISTCNT ${SRAM_HISTCNT} CACHE STRING "32-bit field. Read/write the selected histogram bin.") else () - set(TA1_MAXR "2" CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite") - set(TA1_MAXW "0" CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite") - set(TA1_MAXRW "0" CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite") - set(TA1_RLATENCY "64" CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.") - set(TA1_WLATENCY "0" CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.") - set(TA1_PULSE_ON "320" CACHE STRING "No. of cycles addresses let through (0-65535).") - set(TA1_PULSE_OFF "80" CACHE STRING "No. of cycles addresses blocked (0-65535).") - set(TA1_BWCAP "50" CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite") - set(TA1_PERFCTRL "0" CACHE STRING "6-bit field selecting an event for event counter 0=default") - set(TA1_PERFCNT "0" CACHE STRING "32-bit event counter") - set(TA1_MODE "1" CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun; + set(EXT_MAXR "2" CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite") + set(EXT_MAXW "0" CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite") + set(EXT_MAXRW "0" CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite") + set(EXT_RLATENCY "64" CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.") + set(EXT_WLATENCY "0" CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.") + set(EXT_PULSE_ON "320" CACHE STRING "No. of cycles addresses let through (0-65535).") + set(EXT_PULSE_OFF "80" CACHE STRING "No. of cycles addresses blocked (0-65535).") + set(EXT_BWCAP "50" CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite") + set(EXT_PERFCTRL "0" CACHE STRING "6-bit field selecting an event for event counter 0=default") + set(EXT_PERFCNT "0" CACHE STRING "32-bit event counter") + set(EXT_MODE "1" CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun; Bit 1: 1=enable random AR reordering (0=default); Bit 2: 1=enable random R reordering (0=default); Bit 3: 1=enable random B reordering (0=default); Bit 11-4: Frequency scale 0=full speed, 255=(1/256) speed") - set(TA1_HISTBIN "0" CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.") - set(TA1_HISTCNT "0" CACHE STRING "32-bit field. Read/write the selected histogram bin.") + set(EXT_HISTBIN "0" CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.") + set(EXT_HISTCNT "0" CACHE STRING "32-bit field. Read/write the selected histogram bin.") endif ()
\ No newline at end of file diff --git a/scripts/cmake/timing_adapter/ta_config_u65_high_end.cmake b/scripts/cmake/timing_adapter/ta_config_u65_high_end.cmake index e29d144..ce36be8 100644 --- a/scripts/cmake/timing_adapter/ta_config_u65_high_end.cmake +++ b/scripts/cmake/timing_adapter/ta_config_u65_high_end.cmake @@ -23,46 +23,46 @@ # The platform CMake infra should set the base register values for # TA component to work. For Ethos-U65, we need two base addresses. -if (NOT DEFINED TA0_BASE OR NOT DEFINED TA1_BASE) - message(FATAL_ERROR "TA0_BASE and TA1_BASE need to be defined.") +if (NOT DEFINED TA_SRAM0_BASE OR NOT DEFINED TA_EXT0_BASE) + message(FATAL_ERROR "TA_SRAM0_BASE and TA_EXT0_BASE need to be defined.") endif () -message(STATUS "using TA0_BASE @ ${TA0_BASE}; TA1_BASE @ ${TA1_BASE}.") +message(STATUS "using TA_SRAM0_BASE @ ${TA_SRAM0_BASE}; TA_EXT0_BASE @ ${TA_EXT0_BASE}.") # Timing adapter settings for AXI0 -set(TA0_MAXR "16" CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite") -set(TA0_MAXW "16" CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite") -set(TA0_MAXRW "0" CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite") -set(TA0_RLATENCY "32" CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.") -set(TA0_WLATENCY "32" CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.") -set(TA0_PULSE_ON "3999" CACHE STRING "No. of cycles addresses let through (0-65535).") -set(TA0_PULSE_OFF "1" CACHE STRING "No. of cycles addresses blocked (0-65535).") -set(TA0_BWCAP "4000" CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite") -set(TA0_PERFCTRL "0" CACHE STRING "6-bit field selecting an event for event counter 0=default") -set(TA0_PERFCNT "0" CACHE STRING "32-bit event counter") -set(TA0_MODE "1" CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun; +set(SRAM_MAXR "16" CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite") +set(SRAM_MAXW "16" CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite") +set(SRAM_MAXRW "0" CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite") +set(SRAM_RLATENCY "32" CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.") +set(SRAM_WLATENCY "32" CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.") +set(SRAM_PULSE_ON "3999" CACHE STRING "No. of cycles addresses let through (0-65535).") +set(SRAM_PULSE_OFF "1" CACHE STRING "No. of cycles addresses blocked (0-65535).") +set(SRAM_BWCAP "4000" CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite") +set(SRAM_PERFCTRL "0" CACHE STRING "6-bit field selecting an event for event counter 0=default") +set(SRAM_PERFCNT "0" CACHE STRING "32-bit event counter") +set(SRAM_MODE "1" CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun; Bit 1: 1=enable random AR reordering (0=default); Bit 2: 1=enable random R reordering (0=default); Bit 3: 1=enable random B reordering (0=default); Bit 11-4: Frequency scale 0=full speed, 255=(1/256) speed") -set(TA0_HISTBIN "0" CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.") -set(TA0_HISTCNT "0" CACHE STRING "32-bit field. Read/write the selected histogram bin.") +set(SRAM_HISTBIN "0" CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.") +set(SRAM_HISTCNT "0" CACHE STRING "32-bit field. Read/write the selected histogram bin.") # Timing adapter settings for AXI1 -set(TA1_MAXR "24" CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite") -set(TA1_MAXW "12" CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite") -set(TA1_MAXRW "0" CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite") -set(TA1_RLATENCY "500" CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.") -set(TA1_WLATENCY "250" CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.") -set(TA1_PULSE_ON "4000" CACHE STRING "No. of cycles addresses let through (0-65535).") -set(TA1_PULSE_OFF "1000" CACHE STRING "No. of cycles addresses blocked (0-65535).") -set(TA1_BWCAP "1172" CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite") -set(TA1_PERFCTRL "0" CACHE STRING "6-bit field selecting an event for event counter 0=default") -set(TA1_PERFCNT "0" CACHE STRING "32-bit event counter") -set(TA1_MODE "1" CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun; +set(EXT_MAXR "24" CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite") +set(EXT_MAXW "12" CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite") +set(EXT_MAXRW "0" CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite") +set(EXT_RLATENCY "500" CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.") +set(EXT_WLATENCY "250" CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.") +set(EXT_PULSE_ON "4000" CACHE STRING "No. of cycles addresses let through (0-65535).") +set(EXT_PULSE_OFF "1000" CACHE STRING "No. of cycles addresses blocked (0-65535).") +set(EXT_BWCAP "1172" CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite") +set(EXT_PERFCTRL "0" CACHE STRING "6-bit field selecting an event for event counter 0=default") +set(EXT_PERFCNT "0" CACHE STRING "32-bit event counter") +set(EXT_MODE "1" CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun; Bit 1: 1=enable random AR reordering (0=default); Bit 2: 1=enable random R reordering (0=default); Bit 3: 1=enable random B reordering (0=default); Bit 11-4: Frequency scale 0=full speed, 255=(1/256) speed") -set(TA1_HISTBIN "0" CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.") -set(TA1_HISTCNT "0" CACHE STRING "32-bit field. Read/write the selected histogram bin.") +set(EXT_HISTBIN "0" CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.") +set(EXT_HISTCNT "0" CACHE STRING "32-bit field. Read/write the selected histogram bin.") diff --git a/scripts/cmake/timing_adapter/ta_config_u85_high_end.cmake b/scripts/cmake/timing_adapter/ta_config_u85_high_end.cmake new file mode 100644 index 0000000..98a9be0 --- /dev/null +++ b/scripts/cmake/timing_adapter/ta_config_u85_high_end.cmake @@ -0,0 +1,68 @@ +#---------------------------------------------------------------------------- +# SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its +# affiliates <open-source-office@arm.com> +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#---------------------------------------------------------------------------- + +#---------------------------------------------------------------------------- +# CMake description file for the Arm Ethos-U85 Timing Adapter settings (single +# NPU core with three, four or six AXIs). +#---------------------------------------------------------------------------- + +# The platform CMake infra should set the base register values for +# TA component to work. For Ethos-U85, we need at least three base addresses. +if (NOT DEFINED TA_SRAM0_BASE OR NOT DEFINED TA_SRAM1_BASE OR NOT DEFINED TA_EXT0_BASE) + message(FATAL_ERROR "TA_SRAM0_BASE, TA_SRAM1_BASE and TA_EXT0_BASE need to be defined.") +endif () + +message(STATUS "using TA_SRAM0_BASE @ ${TA_SRAM0_BASE}; TA_SRAM1_BASE @ ${TA_SRAM1_BASE}; TA_EXT0_BASE @ ${TA_EXT0_BASE}.") + +# Timing adapter settings for SRAM +set(SRAM_MAXR "16" CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite") +set(SRAM_MAXW "16" CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite") +set(SRAM_MAXRW "0" CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite") +set(SRAM_RLATENCY "32" CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.") +set(SRAM_WLATENCY "32" CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.") +set(SRAM_PULSE_ON "3999" CACHE STRING "No. of cycles addresses let through (0-65535).") +set(SRAM_PULSE_OFF "1" CACHE STRING "No. of cycles addresses blocked (0-65535).") +set(SRAM_BWCAP "4000" CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite") +set(SRAM_PERFCTRL "0" CACHE STRING "6-bit field selecting an event for event counter 0=default") +set(SRAM_PERFCNT "0" CACHE STRING "32-bit event counter") +set(SRAM_MODE "1" CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun; + Bit 1: 1=enable random AR reordering (0=default); + Bit 2: 1=enable random R reordering (0=default); + Bit 3: 1=enable random B reordering (0=default); + Bit 11-4: Frequency scale 0=full speed, 255=(1/256) speed") +set(SRAM_HISTBIN "0" CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.") +set(SRAM_HISTCNT "0" CACHE STRING "32-bit field. Read/write the selected histogram bin.") + +# Timing adapter settings for EXT +set(EXT_MAXR "24" CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite") +set(EXT_MAXW "12" CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite") +set(EXT_MAXRW "0" CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite") +set(EXT_RLATENCY "500" CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.") +set(EXT_WLATENCY "250" CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.") +set(EXT_PULSE_ON "4000" CACHE STRING "No. of cycles addresses let through (0-65535).") +set(EXT_PULSE_OFF "1000" CACHE STRING "No. of cycles addresses blocked (0-65535).") +set(EXT_BWCAP "1172" CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite") +set(EXT_PERFCTRL "0" CACHE STRING "6-bit field selecting an event for event counter 0=default") +set(EXT_PERFCNT "0" CACHE STRING "32-bit event counter") +set(EXT_MODE "1" CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun; + Bit 1: 1=enable random AR reordering (0=default); + Bit 2: 1=enable random R reordering (0=default); + Bit 3: 1=enable random B reordering (0=default); + Bit 11-4: Frequency scale 0=full speed, 255=(1/256) speed") +set(EXT_HISTBIN "0" CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.") +set(EXT_HISTCNT "0" CACHE STRING "32-bit field. Read/write the selected histogram bin.") diff --git a/scripts/py/vela_configs.py b/scripts/py/vela_configs.py new file mode 100644 index 0000000..b4af9fd --- /dev/null +++ b/scripts/py/vela_configs.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com> +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Classes to represent NPU configurations for Vela +""" +import itertools +import typing +from dataclasses import dataclass + +# The internal SRAM size for Corstone-300 implementation on MPS3 specified by AN552 +# The internal SRAM size for Corstone-310 implementation on MPS3 specified by AN555 +# is 4MB, but we are content with the 2MB specified below. +MPS3_MAX_SRAM_SZ = 2 * 1024 * 1024 # 2 MiB (2 banks of 1 MiB each) + + +@dataclass(frozen=True) +class NpuConfig: + """ + Represents a Vela configuration for an NPU + """ + name_prefix: str + macs: int + processor_id: str + prefix_id: str + memory_mode: str + system_config: str + arena_cache_size: int = 0 + + @property + def config_name(self) -> str: + """ + Get the name of the configuration + + For example: "ethos-u55-128" would represent the Ethos-U55 NPU + with a 128 MAC configuration. + + :return: The NPU configuration name. + """ + return f"{self.name_prefix}-{self.macs}" + + @property + def config_id(self) -> str: + """ + Get the configuration id as a string + + For example: "Y256" would represent the Ethos-U65 NPU + with a 256 MAC configuration. + + :return: The NPU configuration id. + """ + return f"{self.prefix_id}{self.macs}" + + def overwrite_arena_cache_size(self, arena_cache_size): + """ + Get a new NPU configuration with the specified + arena cache size. + + By default, we use the `arena_cache_size` value in the + `default_vela.ini` configuration file. + + :param arena_cache_size: The new arena cache size value. + :return: A new NPU configuration with the new + arena cache size value. + """ + value = arena_cache_size + + if value == 0: + value = MPS3_MAX_SRAM_SZ if self.memory_mode == "Shared_Sram" else None + + return NpuConfig( + **{**self.__dict__, **{"arena_cache_size": value}} + ) + + +@dataclass(frozen=True) +class NpuConfigs: + """ + Represents a collection of NPU configurations. + """ + configs: typing.Dict[str, typing.Dict[int, NpuConfig]] + + @staticmethod + def create(*configs: NpuConfig): + """ + Create a new collection with the specified NPU configurations. + + :param configs: NPU configuration objects to add to the collection. + :return: A new collection of NPU configurations. + """ + _configs = {} + + # Internal data structure of nested dictionaries based on + # NPU name and MAC configuration, e.g.: + # _configs["ethos-u55"][128] + + for c in configs: + if c.name_prefix not in _configs: + _configs[c.name_prefix] = {} + _configs[c.name_prefix][c.macs] = c + return NpuConfigs(configs=_configs) + + def get(self, name_prefix: str, macs: typing.Union[int, str]) -> typing.Optional[NpuConfig]: + """ + Get an NPU configuration by name prefix and MAC configuration. + + :param name_prefix: The name prefix, e.g. "ethos-u55". + :param macs: The MAC configuration, e.g. 128. + :return: The matching NPU configuration, or None if no such configuration + exists in the collection. + """ + configs_for_name = self.configs.get(name_prefix) + if not configs_for_name: + return None + return configs_for_name.get(int(macs)) + + def get_by_name(self, name: str) -> typing.Optional[NpuConfig]: + """ + Get an NPU configuration by name. + + :param name: The NPU configuration name, e.g. "ethos-u55-128". + :return: The matching NPU configuration, or None if no such configuration + exists in the collection. + """ + name_prefix, macs = name.rsplit("-", 1) + return self.get(name_prefix, macs) + + @property + def names(self): + """ + Return a list of all NPU configuration names in the collection. + + :return: The list of NPU configuration names. + """ + return list(itertools.chain.from_iterable([ + [f"{c.name_prefix}-{c.macs}" for c in config.values()] + for config in self.configs.values() + ])) diff --git a/scripts/vela/default_vela.ini b/scripts/vela/default_vela.ini index 9d6baa7..5d4d48e 100644 --- a/scripts/vela/default_vela.ini +++ b/scripts/vela/default_vela.ini @@ -1,5 +1,5 @@ ; -; SPDX-FileCopyrightText: Copyright 2021 Arm Limited and/or its affiliates <open-source-office@arm.com> +; SPDX-FileCopyrightText: Copyright 2021, 2024 Arm Limited and/or its affiliates <open-source-office@arm.com> ; SPDX-License-Identifier: Apache-2.0 ; ; Licensed under the Apache License, Version 2.0 (the "License"); @@ -48,6 +48,71 @@ Dram_clock_scale=0.234375 Dram_burst_length=128 Dram_read_latency=500 Dram_write_latency=250 + +; SRAMx2 (16 GB/s) and DRAMx1 (3.75 GB/s) +[System_Config.Ethos_U85_SYS_DRAM_Low] +core_clock=500e6 +axi0_port=Sram +axi1_port=Dram +Sram_clock_scale=1.0 +Sram_ports_used=2 +Sram_burst_length=64 +Sram_read_latency=16 +Sram_write_latency=16 +Dram_clock_scale=0.46875 +Dram_ports_used=1 +Dram_burst_length=128 +Dram_read_latency=500 +Dram_write_latency=250 + +; SRAMx2 (32 GB/s) and DRAM (12 GB/s) +[System_Config.Ethos_U85_SYS_DRAM_Mid_512] +core_clock=1e9 +axi0_port=Sram +axi1_port=Dram +Sram_clock_scale=1.0 +Sram_ports_used=2 +Sram_burst_length=64 +Sram_read_latency=32 +Sram_write_latency=32 +Dram_clock_scale=0.75 +Dram_ports_used=1 +Dram_burst_length=128 +Dram_read_latency=500 +Dram_write_latency=250 + +; SRAMx2 (32 GB/s) and DRAMx2 (24 GB/s) +[System_Config.Ethos_U85_SYS_DRAM_Mid_1024] +core_clock=1e9 +axi0_port=Sram +axi1_port=Dram +Sram_clock_scale=1.0 +Sram_ports_used=2 +Sram_burst_length=64 +Sram_read_latency=32 +Sram_write_latency=32 +Dram_clock_scale=0.75 +Dram_ports_used=2 +Dram_burst_length=128 +Dram_read_latency=500 +Dram_write_latency=250 + +; SRAMx4 (64 GB/s) and DRAMx2 (24 GB/s) +[System_Config.Ethos_U85_SYS_DRAM_High_2048] +core_clock=1e9 +axi0_port=Sram +axi1_port=Dram +Sram_clock_scale=1.0 +Sram_ports_used=4 +Sram_burst_length=64 +Sram_read_latency=32 +Sram_write_latency=32 +Dram_clock_scale=0.75 +Dram_ports_used=2 +Dram_burst_length=128 +Dram_read_latency=500 +Dram_write_latency=250 + ; ----------------------------------------------------------------------------- ; Memory Mode @@ -64,4 +129,4 @@ cache_mem_area=Axi0 const_mem_area=Axi1 arena_mem_area=Axi1 cache_mem_area=Axi0 -arena_cache_size=393216
\ No newline at end of file +arena_cache_size=393216 |