diff options
Diffstat (limited to 'source/application/hal/platforms/bare-metal/bsp/mem_layout')
4 files changed, 532 insertions, 28 deletions
diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld new file mode 100644 index 0000000..8bb99cd --- /dev/null +++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2021 Arm Limited. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +__STACK_SIZE = 0x00060000; +__HEAP_SIZE = 0x000f0000; + +/* System memory brief */ +MEMORY +{ + ITCM (rx) : ORIGIN = 0x00000000, LENGTH = 0x00080000 + DTCM (rwx) : ORIGIN = 0x20000000, LENGTH = 0x00080000 + BRAM (rwx) : ORIGIN = 0x11000000, LENGTH = 0x00200000 + SRAM (rwx) : ORIGIN = 0x31000000, LENGTH = 0x00400000 + DDR (rwx) : ORIGIN = 0x70000000, LENGTH = 0x02000000 +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions ITCM and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text.at_itcm : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + KEEP(*(.eh_frame*)) + } > ITCM + + .ARM.extab.at_itcm : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ITCM + + __exidx_start = .; + .ARM.exidx.at_itcm : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ITCM + __exidx_end = .; + + .zero.table.at_itcm : + { + . = ALIGN(4); + __zero_table_start__ = .; + + LONG (__bss_start__) + LONG ((__bss_end__ - __bss_start__)/4) /* Size is in 32-bit words */ + + __zero_table_end__ = .; + } > ITCM + + .copy.table.at_itcm : + { + . = ALIGN(4); + __copy_table_start__ = .; + + /* Section to be copied - part 1: any data to be placed in BRAM */ + LONG (__etext) + LONG (__data_start__) + LONG ((__data_end__ - __data_start__)/4) /* Size is in 32-bit words */ + + /* Section to be copied - part 2: RO data for for DTCM */ + LONG (__etext2) + LONG (__ro_data_start__) + LONG ((__ro_data_end__ - __ro_data_start__)/4) /* Size is in 32-bit words */ + + __copy_table_end__ = .; + } > ITCM + + __itcm_total = ALIGN(4); + + ASSERT( __itcm_total < (ORIGIN(ITCM) + LENGTH(ITCM)), "ITCM overflow") + + .sram : + { + . = ALIGN(16); + *(.bss.NoInit.activation_buf) + . = ALIGN(16); + } > SRAM AT > SRAM + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > DTCM AT > DTCM + + .stack (ORIGIN(DTCM) + LENGTH(DTCM) - __STACK_SIZE) (COPY) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > DTCM + PROVIDE(__stack = __StackTop); + ASSERT( + (__STACK_SIZE + __bss_end__ - __bss_start__) <= LENGTH(DTCM), + "DTCM overflow") + + .ddr.at_ddr : + { + /* __attribute__((aligned(16))) is not handled by the CMSIS startup code. + * Force the alignment here as a workaround */ + . = ALIGN(16); + *(ifm) + . = ALIGN(16); + *(nn_model) + . = ALIGN (16); + *(labels) + . = ALIGN (16); + *(activation_buf) + . = ALIGN (16); + } > DDR AT > DDR + + /** + * Location counter can end up 2byte aligned with narrow Thumb code but + * __etext is assumed by startup code to be the LMA of a section in DTCM + * which must be 4byte aligned + */ + __etext = ALIGN (4); + + .bram.at_ddr : AT (__etext) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + . = ALIGN(4); + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + . = ALIGN(4); + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + . = ALIGN(4); + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + KEEP(*(.jcr*)) + . = ALIGN(4); + + __data_end__ = .; + } > BRAM + + __etext2 = __etext + (__data_end__ - __data_start__); + + .data.at_ddr : AT (__etext2) + { + . = ALIGN(4); + __ro_data_start__ = .; + + *(.rodata*) + . = ALIGN(4); + * (npu_driver_version) + . = ALIGN(4); + * (npu_driver_arch_version) + . = ALIGN(4); + + __ro_data_end__ = .; + } > BRAM + + .heap (COPY) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > BRAM + + ASSERT ( + (__ro_data_end__ - __ro_data_start__) + + (__data_end__ - __data_start__) + + __HEAP_SIZE <= LENGTH(BRAM), + "BRAM overflow") +} diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct index 327d511..55ed5d7 100644 --- a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct +++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct @@ -78,12 +78,12 @@ LOAD_REGION_0 0x00000000 0x00080000 LOAD_REGION_1 0x70000000 0x02000000 { ;----------------------------------------------------- - ; 32 MiB of DRAM space for neural network model, + ; 32 MiB of DDR space for neural network model, ; input vectors and labels. If the activation buffer ; size required by the network is bigger than the ; SRAM size available, it is accommodated here. ;----------------------------------------------------- - dram.bin 0x70000000 ALIGN 16 0x02000000 + ddr.bin 0x70000000 ALIGN 16 0x02000000 { ; nn model's baked in input matrices *.o (ifm) @@ -110,9 +110,9 @@ LOAD_REGION_1 0x70000000 0x02000000 } ;----------------------------------------------------- - ; Remaining part of the 2MiB BRAM used as heap space. - ; 0x00200000 - 0x00040000 = 0x001C0000 (1.75 MiB) + ; 960 KiB of remaining part of the 2MiB BRAM used as + ; heap space. 0x000F0000 of 0x0x001C0000 available. ;----------------------------------------------------- - ARM_LIB_HEAP 0x11040000 EMPTY ALIGN 8 0x001C0000 + ARM_LIB_HEAP 0x11040000 EMPTY ALIGN 8 0x000F0000 {} } diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.ld b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.ld new file mode 100644 index 0000000..8bb99cd --- /dev/null +++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.ld @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2021 Arm Limited. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +__STACK_SIZE = 0x00060000; +__HEAP_SIZE = 0x000f0000; + +/* System memory brief */ +MEMORY +{ + ITCM (rx) : ORIGIN = 0x00000000, LENGTH = 0x00080000 + DTCM (rwx) : ORIGIN = 0x20000000, LENGTH = 0x00080000 + BRAM (rwx) : ORIGIN = 0x11000000, LENGTH = 0x00200000 + SRAM (rwx) : ORIGIN = 0x31000000, LENGTH = 0x00400000 + DDR (rwx) : ORIGIN = 0x70000000, LENGTH = 0x02000000 +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions ITCM and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + .text.at_itcm : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + KEEP(*(.eh_frame*)) + } > ITCM + + .ARM.extab.at_itcm : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ITCM + + __exidx_start = .; + .ARM.exidx.at_itcm : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ITCM + __exidx_end = .; + + .zero.table.at_itcm : + { + . = ALIGN(4); + __zero_table_start__ = .; + + LONG (__bss_start__) + LONG ((__bss_end__ - __bss_start__)/4) /* Size is in 32-bit words */ + + __zero_table_end__ = .; + } > ITCM + + .copy.table.at_itcm : + { + . = ALIGN(4); + __copy_table_start__ = .; + + /* Section to be copied - part 1: any data to be placed in BRAM */ + LONG (__etext) + LONG (__data_start__) + LONG ((__data_end__ - __data_start__)/4) /* Size is in 32-bit words */ + + /* Section to be copied - part 2: RO data for for DTCM */ + LONG (__etext2) + LONG (__ro_data_start__) + LONG ((__ro_data_end__ - __ro_data_start__)/4) /* Size is in 32-bit words */ + + __copy_table_end__ = .; + } > ITCM + + __itcm_total = ALIGN(4); + + ASSERT( __itcm_total < (ORIGIN(ITCM) + LENGTH(ITCM)), "ITCM overflow") + + .sram : + { + . = ALIGN(16); + *(.bss.NoInit.activation_buf) + . = ALIGN(16); + } > SRAM AT > SRAM + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > DTCM AT > DTCM + + .stack (ORIGIN(DTCM) + LENGTH(DTCM) - __STACK_SIZE) (COPY) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > DTCM + PROVIDE(__stack = __StackTop); + ASSERT( + (__STACK_SIZE + __bss_end__ - __bss_start__) <= LENGTH(DTCM), + "DTCM overflow") + + .ddr.at_ddr : + { + /* __attribute__((aligned(16))) is not handled by the CMSIS startup code. + * Force the alignment here as a workaround */ + . = ALIGN(16); + *(ifm) + . = ALIGN(16); + *(nn_model) + . = ALIGN (16); + *(labels) + . = ALIGN (16); + *(activation_buf) + . = ALIGN (16); + } > DDR AT > DDR + + /** + * Location counter can end up 2byte aligned with narrow Thumb code but + * __etext is assumed by startup code to be the LMA of a section in DTCM + * which must be 4byte aligned + */ + __etext = ALIGN (4); + + .bram.at_ddr : AT (__etext) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + . = ALIGN(4); + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + . = ALIGN(4); + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + . = ALIGN(4); + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + KEEP(*(.jcr*)) + . = ALIGN(4); + + __data_end__ = .; + } > BRAM + + __etext2 = __etext + (__data_end__ - __data_start__); + + .data.at_ddr : AT (__etext2) + { + . = ALIGN(4); + __ro_data_start__ = .; + + *(.rodata*) + . = ALIGN(4); + * (npu_driver_version) + . = ALIGN(4); + * (npu_driver_arch_version) + . = ALIGN(4); + + __ro_data_end__ = .; + } > BRAM + + .heap (COPY) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > BRAM + + ASSERT ( + (__ro_data_end__ - __ro_data_start__) + + (__data_end__ - __data_start__) + + __HEAP_SIZE <= LENGTH(BRAM), + "BRAM overflow") +} diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct index a1ffb49..deb4214 100644 --- a/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct +++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct @@ -36,35 +36,36 @@ LOAD_REGION_0 0x00000000 0x00080000 } ;----------------------------------------------------- - ; BRAM or FPGA data SRAM region worth 2MiB - ;----------------------------------------------------- - bram.bin 0x11000000 UNINIT ALIGN 16 0x00200000 - { - ; activation buffers a.k.a tensor arena - *.o (.bss.NoInit.activation_buf) - } - - ;----------------------------------------------------- - ; 128kiB of 512kiB bank is used for any other RW or ZI + ; 128kiB of 512kiB DTCM is used for any other RW or ZI ; data. Note: this region is internal to the Cortex-M - ; CPU + ; CPU. ;----------------------------------------------------- dtcm.bin 0x20000000 0x00020000 { + ; Any R/W and/or zero initialised data .ANY(+RW +ZI) } ;----------------------------------------------------- - ; 128kiB of stack space within the DTCM region + ; 384kiB of stack space within the DTCM region. See + ; `dtcm.bin` for the first section. Note: by virtue of + ; being part of DTCM, this region is only accessible + ; from Cortex-M55. ;----------------------------------------------------- - ARM_LIB_STACK 0x20020000 EMPTY ALIGN 8 0x00020000 + ARM_LIB_STACK 0x20020000 EMPTY ALIGN 8 0x00060000 {} ;----------------------------------------------------- - ; 256kiB of heap space within the DTCM region + ; SSE-300's internal SRAM of 4MiB - reserved for + ; activation buffers. + ; This region should have 3 cycle read latency from + ; both Cortex-M55 and Ethos-U55 ;----------------------------------------------------- - ARM_LIB_HEAP 0x20040000 EMPTY ALIGN 8 0x00040000 - {} + isram.bin 0x31000000 UNINIT ALIGN 16 0x00400000 + { + ; activation buffers a.k.a tensor arena + *.o (.bss.NoInit.activation_buf) + } } ;--------------------------------------------------------- @@ -73,9 +74,12 @@ LOAD_REGION_0 0x00000000 0x00080000 LOAD_REGION_1 0x70000000 0x02000000 { ;----------------------------------------------------- - ; 32 MiB of DRAM space for nn model and input vectors + ; 32 MiB of DDR space for neural network model, + ; input vectors and labels. If the activation buffer + ; size required by the network is bigger than the + ; SRAM size available, it is accommodated here. ;----------------------------------------------------- - dram.bin 0x70000000 ALIGN 16 0x02000000 + ddr.bin 0x70000000 ALIGN 16 0x02000000 { ; nn model's baked in input matrices *.o (ifm) @@ -83,20 +87,28 @@ LOAD_REGION_1 0x70000000 0x02000000 ; nn model *.o (nn_model) + ; labels + *.o (labels) + ; if the activation buffer (tensor arena) doesn't ; fit in the SRAM region, we accommodate it here *.o (activation_buf) } ;----------------------------------------------------- - ; SSE-300's internal SRAM of 2MiB - reserved for - ; activation buffers. - ; This region should have 3 cycle read latency from - ; both Cortex-M55 and Ethos-U55 + ; First 256kiB of BRAM (FPGA SRAM) used for RO data. + ; Note: Total BRAM size available is 2MiB. ;----------------------------------------------------- - isram.bin 0x31000000 0x00080000 + bram.bin 0x11000000 ALIGN 8 0x00040000 { ; RO data (incl. unwinding tables for debugging) .ANY (+RO-DATA) } + + ;----------------------------------------------------- + ; 960 KiB of remaining part of the 2MiB BRAM used as + ; heap space. 0x000F0000 of 0x0x001C0000 available. + ;----------------------------------------------------- + ARM_LIB_HEAP 0x11040000 EMPTY ALIGN 8 0x000F0000 + {} } |