From f4962c8d00f00f989b0ecfa0211dc6ec44ec2878 Mon Sep 17 00:00:00 2001 From: Kshitij Sisodia Date: Mon, 4 Oct 2021 12:20:33 +0100 Subject: MLECO-2344: Documentation improvement Documenting how the target platform's SRAM size impacts configuration files, sources and linker scripts. Change-Id: I8647ab67b73bafd0c44e6c586a1b5f2602bf03f5 --- docs/sections/memory_considerations.md | 61 +++++++++++++++++++++- .../subsystem-profiles/corstone-sse-300.cmake | 20 +++---- .../cmake/subsystem-profiles/simple_platform.cmake | 44 +++++++++++++++- 3 files changed, 114 insertions(+), 11 deletions(-) diff --git a/docs/sections/memory_considerations.md b/docs/sections/memory_considerations.md index 599c902..fc81f8f 100644 --- a/docs/sections/memory_considerations.md +++ b/docs/sections/memory_considerations.md @@ -2,6 +2,8 @@ - [Memory considerations](#memory-considerations) - [Introduction](#introduction) + - [Memory available on the target platform](#memory-available-on-the-target-platform) + - [Parameters linked to SRAM size definitions](#parameters-linked-to-sram-size-definitions) - [Understanding memory usage from Vela output](#understanding-memory-usage-from-vela-output) - [Total SRAM used](#total-sram-used) - [Total Off-chip Flash used](#total-off_chip-flash-used) @@ -20,7 +22,7 @@ applicable for other platforms too. The Arm® *Corstone™-300* is composed of b The Arm® *Ethos™-U* NPU interacts with the system through two AXI interfaces. The first one, is envisaged to be the higher-bandwidth, lower-latency, interface. In a typical system, this is wired to an SRAM as it is required to service -frequent Read and Write traffic. +frequent read and write traffic. The second interface is expected to have a higher-latency, lower-bandwidth characteristic, and is typically wired to a flash device servicing read-only traffic. In this configuration, the Arm® *Cortex™-M55* CPU and Arm® *Ethos™-U* NPU @@ -31,6 +33,63 @@ The input and output tensors, along with any intermediate computation buffers, a Arm® *Cortex™-M55* CPU and Arm® *Ethos™-U* NPU would be reading, or writing, to this region when running an inference. The Arm® *Ethos™-U* NPU requests these Read and Write transactions over the first AXI bus. +## Memory available on the target platform + +Embedded target platforms supported have a description in the form of CMake files. These files +have definitions that describe the memory regions and the peripheral base addresses. + +See the example for Arm® *Corstone™-300* description file [corstone-sse-300.cmake](../../scripts/cmake/subsystem-profiles/corstone-sse-300.cmake). For the discussion on this page, it is useful to note the following definitions: + +``` +set(ISRAM0_SIZE "0x00200000" CACHE STRING "ISRAM0 size: 2 MiB") +set(ISRAM1_SIZE "0x00200000" CACHE STRING "ISRAM1 size: 2 MiB") +... +# SRAM size reserved for activation buffers +math(EXPR ACTIVATION_BUF_SRAM_SZ "${ISRAM0_SIZE} + ${ISRAM1_SIZE}" OUTPUT_FORMAT HEXADECIMAL) +``` +This will set `ACTIVATION_BUF_SRAM_SZ` to be **4 MiB** for Arm® *Corstone™-300* target platform. +As mentioned in the comments within the file, this size is directly linked to the size mentioned +in the linker scripts, and therefore, it should not be changed without corresponding changes +in the linker script too. For example, a snippet from the scatter file for Corstone™-300 shows: + +``` +;----------------------------------------------------- +; SSE-300's internal SRAM of 4MiB - reserved for +; activation buffers. +; This region should have 3 cycle read latency from +; both Cortex-M55 and Ethos-U NPU +;----------------------------------------------------- +isram.bin 0x31000000 UNINIT ALIGN 16 0x00400000 +{ + ... +} +``` +If the usable size of the internal SRAM was to be increased/decreased, the change should be +made in both the linker script as well as the `corstone-300.cmake` definition. + +### Parameters linked to SRAM size definitions + +Other than the obvious link between the linker script and the target profile description in +CMake files, there are other parameters linked to what the reserved space for activation +buffers is. These are: + +- The file [default_vela.ini](../../scripts/vela/default_vela.ini) contains a parameter called + `arena_cache_size` under `Shared_Sram` memory mode. For example: + ``` + [Memory_Mode.Shared_Sram] + const_mem_area=Axi1 + arena_mem_area=Axi0 + cache_mem_area=Axi0 + arena_cache_size=4194304 + ``` + This size of **4 MiB** here is provided here to allow the default vela optimisation process to + use this size as a hint for the available SRAM size for use by the CPU and the NPU. + +- In every `usecase.cmake` file (present within each use case's source directory), there is + a parameter called `${use_case}_ACTIVATION_BUF_SZ` set to a fixed value by default. This + default value should be less than the `ACTIVATION_BUF_SRAM_SZ` if the activation buffer needs + to be reserved in the target platform's SRAM region. + ## Understanding memory usage from Vela output ### Total SRAM used diff --git a/scripts/cmake/subsystem-profiles/corstone-sse-300.cmake b/scripts/cmake/subsystem-profiles/corstone-sse-300.cmake index 38930af..7e27f3c 100644 --- a/scripts/cmake/subsystem-profiles/corstone-sse-300.cmake +++ b/scripts/cmake/subsystem-profiles/corstone-sse-300.cmake @@ -16,15 +16,6 @@ #---------------------------------------------------------------------------- # CMake configuration file for peripheral memory map for MPS3 as per SSE-300 design -################################################################################################### -# Application specific config # -################################################################################################### - -# This parameter is based on the linker/scatter script for SSE-300. Do not change this parameter -# in isolation. -set(ACTIVATION_BUF_SRAM_SZ "0x00400000" CACHE STRING "Maximum SRAM size for activation buffers") -set(DESIGN_NAME "Arm Corstone-300 (SSE-300)" CACHE STRING "Design name") - ################################################################################################### # Mem sizes # ################################################################################################### @@ -66,6 +57,17 @@ set(DDR4_BLK1_BASE_S "0x90000000" CACHE STRING "DDR4 block 1 Secure base ad set(DDR4_BLK2_BASE_S "0xB0000000" CACHE STRING "DDR4 block 2 Secure base address") set(DDR4_BLK3_BASE_S "0xD0000000" CACHE STRING "DDR4 block 3 Secure base address") +################################################################################################### +# Application specific config # +################################################################################################### + +# This parameter is based on the linker/scatter script for SSE-300. Do not change this parameter +# in isolation. +set(DESIGN_NAME "Arm Corstone-300 (SSE-300)" CACHE STRING "Design name") + +# SRAM size reserved for activation buffers +math(EXPR ACTIVATION_BUF_SRAM_SZ "${ISRAM0_SIZE} + ${ISRAM1_SIZE}" OUTPUT_FORMAT HEXADECIMAL) + ################################################################################################### # Base addresses for dynamic loads (to be used for FVP form only) # ################################################################################################### diff --git a/scripts/cmake/subsystem-profiles/simple_platform.cmake b/scripts/cmake/subsystem-profiles/simple_platform.cmake index 69a69b5..664697b 100644 --- a/scripts/cmake/subsystem-profiles/simple_platform.cmake +++ b/scripts/cmake/subsystem-profiles/simple_platform.cmake @@ -19,6 +19,45 @@ # version of Arm Corstone-300 platform with minimal peripherals to be able to use Ethos-U55. However, # for ease of integration with Arm FastModel Tools, it uses PL011 as the UART component instead of # the CMSDK UART block used by the MPS3 FPGA and FVP implementations. +################################################################################################### +# Mem sizes # +################################################################################################### +set(ITCM_SIZE "0x00080000" CACHE STRING "ITCM size: 512 kiB") +set(DTCM_BLK_SIZE "0x00020000" CACHE STRING "DTCM size: 128 kiB, 4 banks") +set(BRAM_SIZE "0x00200000" CACHE STRING "BRAM size: 2 MiB") +set(ISRAM0_SIZE "0x00200000" CACHE STRING "ISRAM0 size: 2 MiB") +set(ISRAM1_SIZE "0x00200000" CACHE STRING "ISRAM1 size: 2 MiB") +set(DDR4_BLK_SIZE "0x10000000" CACHE STRING "DDR4 block size: 256 MiB") + +################################################################################################### +# Base addresses for memory regions # +################################################################################################### +set(ITCM_BASE_NS "0x00000000" CACHE STRING "Instruction TCM Non-Secure base address") +set(BRAM_BASE_NS "0x01000000" CACHE STRING "CODE SRAM Non-Secure base address") +set(DTCM0_BASE_NS "0x20000000" CACHE STRING "Data TCM block 0 Non-Secure base address") +set(DTCM1_BASE_NS "0x20020000" CACHE STRING "Data TCM block 1 Non-Secure base address") +set(DTCM2_BASE_NS "0x20040000" CACHE STRING "Data TCM block 2 Non-Secure base address") +set(DTCM3_BASE_NS "0x20060000" CACHE STRING "Data TCM block 3 Non-Secure base address") +set(ISRAM0_BASE_NS "0x21000000" CACHE STRING "Internal SRAM Area Non-Secure base address") +set(ISRAM1_BASE_NS "0x21200000" CACHE STRING "Internal SRAM Area Non-Secure base address") +set(QSPI_SRAM_BASE_NS "0x28000000" CACHE STRING "QSPI SRAM Non-Secure base address") +set(DDR4_BLK0_BASE_NS "0x60000000" CACHE STRING "DDR4 block 0 Non-Secure base address") +set(DDR4_BLK1_BASE_NS "0x80000000" CACHE STRING "DDR4 block 1 Non-Secure base address") +set(DDR4_BLK2_BASE_NS "0xA0000000" CACHE STRING "DDR4 block 2 Non-Secure base address") +set(DDR4_BLK3_BASE_NS "0xC0000000" CACHE STRING "DDR4 block 3 Non-Secure base address") + +set(ITCM_BASE_S "0x10000000" CACHE STRING "Instruction TCM Secure base address") +set(BRAM_BASE_S "0x11000000" CACHE STRING "CODE SRAM Secure base address") +set(DTCM0_BASE_S "0x30000000" CACHE STRING "Data TCM block 0 Secure base address") +set(DTCM1_BASE_S "0x30020000" CACHE STRING "Data TCM block 1 Secure base address") +set(DTCM2_BASE_S "0x30040000" CACHE STRING "Data TCM block 2 Secure base address") +set(DTCM3_BASE_S "0x30060000" CACHE STRING "Data TCM block 3 Secure base address") +set(ISRAM0_BASE_S "0x31000000" CACHE STRING "Internal SRAM Area Secure base address") +set(ISRAM1_BASE_S "0x31200000" CACHE STRING "Internal SRAM Area Secure base address") +set(DDR4_BLK0_BASE_S "0x70000000" CACHE STRING "DDR4 block 0 Secure base address") +set(DDR4_BLK1_BASE_S "0x90000000" CACHE STRING "DDR4 block 1 Secure base address") +set(DDR4_BLK2_BASE_S "0xB0000000" CACHE STRING "DDR4 block 2 Secure base address") +set(DDR4_BLK3_BASE_S "0xD0000000" CACHE STRING "DDR4 block 3 Secure base address") ################################################################################################### # Application specific config # @@ -26,9 +65,12 @@ # This parameter is based on the linker/scatter script for simple platform. Do not change this # parameter in isolation. -set(ACTIVATION_BUF_SRAM_SZ "0x00200000" CACHE STRING "Maximum SRAM size for activation buffers") set(DESIGN_NAME "Simple platform" CACHE STRING "Design name") +# SRAM size reserved for activation buffers +math(EXPR ACTIVATION_BUF_SRAM_SZ "${ISRAM0_SIZE} + ${ISRAM1_SIZE}" OUTPUT_FORMAT HEXADECIMAL) + + ################################################################################################### # Base addresses # ################################################################################################### -- cgit v1.2.1