flash/stm32l4x: switch to to c loader instead of assembly loader

switching to C loader instead of the assembly version will enhance readability
will reduce the maintenance effort.

besides the switch to C loader, we added a new parameters to the loader
like flash_word_size and flash_sr_bsy_mask in order to support properly
STM32U5x and STM32G0Bx/G0Cx in dual-bank mode.

Change-Id: I24cafc2ba637a065593a0506eae787b21080a0ba
Signed-off-by: Tarek BOCHKATI <tarek.bouchkati@gmail.com>
Reviewed-on: https://review.openocd.org/c/openocd/+/6109
Tested-by: jenkins
Reviewed-by: Oleksij Rempel <linux@rempel-privat.de>
This commit is contained in:
Tarek BOCHKATI 2021-03-06 22:46:35 +01:00 committed by Oleksij Rempel
parent 385eedfc6f
commit 4b1492bb8e
6 changed files with 259 additions and 149 deletions

View File

@ -6,14 +6,19 @@ CC=$(CROSS_COMPILE)gcc
OBJCOPY=$(CROSS_COMPILE)objcopy OBJCOPY=$(CROSS_COMPILE)objcopy
OBJDUMP=$(CROSS_COMPILE)objdump OBJDUMP=$(CROSS_COMPILE)objdump
CFLAGS = -static -nostartfiles -mlittle-endian -Wa,-EL
AFLAGS = -static -nostartfiles -mlittle-endian -Wa,-EL
CFLAGS = -c -mthumb -nostdlib -nostartfiles -Os -g -fPIC
all: stm32f1x.inc stm32f2x.inc stm32h7x.inc stm32l4x.inc stm32lx.inc all: stm32f1x.inc stm32f2x.inc stm32h7x.inc stm32l4x.inc stm32lx.inc
.PHONY: clean .PHONY: clean
%.elf: %.S %.elf: %.S
$(CC) $(CFLAGS) $< -o $@ $(CC) $(AFLAGS) $< -o $@
stm32l4x.elf: stm32l4x.c
$(CC) $(CFLAGS) -mcpu=cortex-m0plus -fstack-usage -Wa,-adhln=$(<:.c=.lst) $< -o $@
%.lst: %.elf %.lst: %.elf
$(OBJDUMP) -S $< > $@ $(OBJDUMP) -S $< > $@

View File

@ -1,105 +0,0 @@
/***************************************************************************
* Copyright (C) 2010 by Spencer Oliver *
* spen@spen-soft.co.uk *
* *
* Copyright (C) 2011 Øyvind Harboe *
* oyvind.harboe@zylin.com *
* *
* Copyright (C) 2015 Uwe Bonnes *
* bon@elektron.ikp.physik.tu-darmstadt.de *
* *
* Copyright (C) 2018 Andreas Bolsch *
* andreas.bolsch@mni.thm.de *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc. *
***************************************************************************/
.text
.syntax unified
.cpu cortex-m0
.thumb
/*
* Params :
* r0 = workarea start, status (out)
* r1 = workarea end + 1
* r2 = target address
* r3 = count (64bit words)
* r4 = flash status register
* r5 = flash control register
*
* Clobbered:
* r6/7 - temp (64-bit)
*/
#include "../../../../src/flash/nor/stm32l4x.h"
.thumb_func
.global _start
_start:
mov r8, r3 /* copy dword count */
wait_fifo:
ldr r6, [r0, #0] /* read wp */
cmp r6, #0 /* if wp == 0, */
beq exit /* then abort */
ldr r3, [r0, #4] /* read rp */
subs r6, r6, r3 /* number of bytes available for read in r6 */
bpl fifo_stat /* if not wrapped around, skip */
adds r6, r6, r1 /* add end of buffer */
subs r6, r6, r0 /* sub start of buffer */
fifo_stat:
cmp r6, #8 /* wait until at least one dword available */
bcc wait_fifo
movs r6, #FLASH_PG /* flash program enable */
str r6, [r5] /* write to FLASH_CR, start operation */
ldmia r3!, {r6, r7} /* read one dword from src, increment ptr */
stmia r2!, {r6, r7} /* write one dword to dst, increment ptr */
dsb
ldr r7, =FLASH_BSY /* FLASH_BSY mask */
busy:
ldr r6, [r4] /* get FLASH_SR register */
tst r6, r7 /* BSY == 1 => operation in progress */
bne busy /* if still set, wait more ... */
movs r7, #FLASH_ERROR /* all error bits */
tst r6, r7 /* check for any error bit */
bne error /* fail ... */
cmp r3, r1 /* rp at end of buffer? */
bcc upd_rp /* if no, then skip */
subs r3, r3, r1 /* sub end of buffer */
adds r3, r3, r0 /* add start of buffer */
adds r3, r3, #8 /* skip wp and rp */
upd_rp:
str r3, [r0, #4] /* store rp */
mov r7, r8 /* get dword count */
subs r7, r7, #1 /* decrement dword count */
mov r8, r7 /* save dword count */
beq exit /* exit if done */
b wait_fifo
.pool
error:
movs r3, #0
str r3, [r0, #4] /* set rp = 0 on error */
exit:
mov r0, r6 /* return status in r0 */
movs r6, #0 /* flash program disable */
str r6, [r5] /* write to FLASH_CR */
movs r6, #FLASH_ERROR /* all error bits */
str r6, [r4] /* write to FLASH_CR to clear errors */
bkpt #0x00

View File

@ -0,0 +1,189 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* Copyright (C) 2021 Tarek BOCHKATI
* tarek.bouchkati@st.com
*/
#include <stdint.h>
#include "../../../../src/flash/nor/stm32l4x.h"
static inline __attribute__((always_inline))
void copy_buffer_u32(uint32_t *dst, uint32_t *src, int len)
{
for (int i = 0; i < len; i++)
dst[i] = src[i];
}
/* this function is assumes that fifo_size is multiple of flash_word_size
* this condition is ensured by target_run_flash_async_algorithm
*/
void write(volatile struct stm32l4_work_area *work_area,
uint8_t *fifo_end,
uint8_t *target_address,
uint32_t count)
{
volatile uint32_t *flash_sr = (uint32_t *) work_area->params.flash_sr_addr;
volatile uint32_t *flash_cr = (uint32_t *) work_area->params.flash_cr_addr;
/* optimization to avoid reading from memory each time */
uint8_t *rp_cache = work_area->fifo.rp;
/* fifo_start is used to wrap when we reach fifo_end */
uint8_t *fifo_start = rp_cache;
/* enable flash programming */
*flash_cr = FLASH_PG;
while (count) {
/* optimization to avoid reading from memory each time */
uint8_t *wp_cache = work_area->fifo.wp;
if (wp_cache == 0)
break; /* aborted by target_run_flash_async_algorithm */
int32_t fifo_size = wp_cache - rp_cache;
if (fifo_size < 0) {
/* consider the linear fifo, we will wrap later */
fifo_size = fifo_end - rp_cache;
}
/* wait for at least a flash word */
while (fifo_size >= work_area->params.flash_word_size) {
copy_buffer_u32((uint32_t *)target_address,
(uint32_t *)rp_cache,
work_area->params.flash_word_size / 4);
/* update target_address and rp_cache */
target_address += work_area->params.flash_word_size;
rp_cache += work_area->params.flash_word_size;
/* wait for the busy flag */
while (*flash_sr & work_area->params.flash_sr_bsy_mask)
;
if (*flash_sr & FLASH_ERROR) {
work_area->fifo.rp = 0; /* set rp to zero 0 on error */
goto write_end;
}
/* wrap if reach the fifo_end, and update rp in memory */
if (rp_cache >= fifo_end)
rp_cache = fifo_start;
/* flush the rp cache value,
* so target_run_flash_async_algorithm can fill the circular fifo */
work_area->fifo.rp = rp_cache;
/* update fifo_size and count */
fifo_size -= work_area->params.flash_word_size;
count--;
}
}
write_end:
/* disable flash programming */
*flash_cr = 0;
/* soft break the loader */
__asm("bkpt 0");
}
/* by enabling this define 'DEBUG':
* the main() function can help help debugging the loader algo
* note: the application should be linked into RAM */
/* #define DEBUG */
#ifdef DEBUG
/* device selector: STM32L5 | STM32U5 | STM32WB | STM32WL | STM32WL_CPU2 | STM32G0Bx | ... */
#define STM32U5
/* when using a secure device, and want to test the secure programming enable this define */
/* #define SECURE */
#if defined(STM32U5)
# define FLASH_WORD_SIZE 16
#else
# define FLASH_WORD_SIZE 8
#endif
#if defined(STM32WB) || defined(STM32WL)
# define FLASH_BASE 0x58004000
#else
# define FLASH_BASE 0x40022000
#endif
#if defined(STM32G0Bx)
# define FLASH_BSY_MASK (FLASH_BSY | FLASH_BSY2)
#else
# define FLASH_BSY_MASK FLASH_BSY
#endif
#if defined(STM32L5) || defined(STM32U5)
# ifdef SECURE
# define FLASH_KEYR_OFFSET 0x0c
# define FLASH_SR_OFFSET 0x24
# define FLASH_CR_OFFSET 0x2c
# else
# define FLASH_KEYR_OFFSET 0x08
# define FLASH_SR_OFFSET 0x20
# define FLASH_CR_OFFSET 0x28
# endif
#elif defined(STM32WL_CPU2)
# define FLASH_KEYR_OFFSET 0x08
# define FLASH_SR_OFFSET 0x60
# define FLASH_CR_OFFSET 0x64
#else
# define FLASH_KEYR_OFFSET 0x08
# define FLASH_SR_OFFSET 0x10
# define FLASH_CR_OFFSET 0x14
#endif
#define FLASH_KEYR (uint32_t *)((FLASH_BASE) + (FLASH_KEYR_OFFSET))
#define FLASH_SR (uint32_t *)((FLASH_BASE) + (FLASH_SR_OFFSET))
#define FLASH_CR (uint32_t *)((FLASH_BASE) + (FLASH_CR_OFFSET))
int main()
{
const uint32_t count = 2;
const uint32_t buf_size = count * FLASH_WORD_SIZE;
const uint32_t work_area_size = sizeof(struct stm32l4_work_area) + buf_size;
uint8_t work_area_buf[work_area_size];
struct stm32l4_work_area *workarea = (struct stm32l4_work_area *)work_area_buf;
/* fill the workarea struct */
workarea->params.flash_sr_addr = (uint32_t)(FLASH_SR);
workarea->params.flash_cr_addr = (uint32_t)(FLASH_CR);
workarea->params.flash_word_size = FLASH_WORD_SIZE;
workarea->params.flash_sr_bsy_mask = FLASH_BSY_MASK;
/* note: the workarea->stack is not used, in this configuration */
/* programming the existing memory raw content in workarea->fifo.buf */
/* feel free to fill the memory with magical values ... */
workarea->fifo.wp = (uint8_t *)(&workarea->fifo.buf + buf_size);
workarea->fifo.rp = (uint8_t *)&workarea->fifo.buf;
/* unlock the flash */
*FLASH_KEYR = KEY1;
*FLASH_KEYR = KEY2;
/* erase sector 0 */
*FLASH_CR = FLASH_PER | FLASH_STRT;
while (*FLASH_SR & FLASH_BSY)
;
/* flash address, should be aligned to FLASH_WORD_SIZE */
uint8_t *target_address = (uint8_t *) 0x8000000;
write(workarea,
(uint8_t *)(workarea + work_area_size),
target_address,
count);
while (1)
;
}
#endif /* DEBUG */

View File

@ -1,7 +1,10 @@
/* Autogenerated with ../../../../src/helper/bin2char.sh */ /* Autogenerated with ../../../../src/helper/bin2char.sh */
0x98,0x46,0x06,0x68,0x00,0x2e,0x23,0xd0,0x43,0x68,0xf6,0x1a,0x01,0xd5,0x76,0x18, 0xf0,0xb5,0x87,0xb0,0x07,0x68,0x01,0x93,0x43,0x68,0x04,0x91,0x02,0x93,0x83,0x6f,
0x36,0x1a,0x08,0x2e,0xf5,0xd3,0x01,0x26,0x2e,0x60,0xc0,0xcb,0xc0,0xc2,0xbf,0xf3, 0x02,0x99,0x03,0x93,0x01,0x23,0x0b,0x60,0x03,0x9b,0x01,0x99,0x00,0x29,0x1f,0xd0,
0x4f,0x8f,0x09,0x4f,0x26,0x68,0x3e,0x42,0xfc,0xd1,0xfa,0x27,0x3e,0x42,0x0d,0xd1, 0x41,0x6f,0x00,0x29,0x1c,0xd0,0xc9,0x1a,0x01,0xd5,0x04,0x99,0xc9,0x1a,0x84,0x68,
0x8b,0x42,0x02,0xd3,0x5b,0x1a,0x1b,0x18,0x08,0x33,0x43,0x60,0x47,0x46,0x01,0x3f, 0x8c,0x42,0xf2,0xd8,0x85,0x68,0xac,0x08,0x05,0x94,0x00,0x24,0x05,0x9d,0xa5,0x42,
0xb8,0x46,0x05,0xd0,0xdd,0xe7,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x23,0x43,0x60, 0x14,0xdc,0x84,0x68,0x12,0x19,0x84,0x68,0x1b,0x19,0x3c,0x68,0xc5,0x68,0x2e,0x00,
0x30,0x46,0x00,0x26,0x2e,0x60,0xfa,0x26,0x26,0x60,0x00,0xbe, 0x26,0x40,0x25,0x42,0xf9,0xd1,0xfa,0x25,0x3c,0x68,0x2c,0x42,0x0b,0xd0,0x86,0x67,
0x00,0x23,0x02,0x9a,0x13,0x60,0x00,0xbe,0x07,0xb0,0xf0,0xbd,0xa6,0x00,0x9d,0x59,
0x01,0x34,0x95,0x51,0xe2,0xe7,0x04,0x9c,0x9c,0x42,0x00,0xd8,0x03,0x9b,0x83,0x67,
0x84,0x68,0x09,0x1b,0x01,0x9c,0x01,0x3c,0x01,0x94,0xd0,0xe7,

View File

@ -1319,11 +1319,10 @@ static int stm32l4_write_block(struct flash_bank *bank, const uint8_t *buffer,
{ {
struct target *target = bank->target; struct target *target = bank->target;
struct stm32l4_flash_bank *stm32l4_info = bank->driver_priv; struct stm32l4_flash_bank *stm32l4_info = bank->driver_priv;
uint32_t buffer_size;
struct working_area *write_algorithm; struct working_area *write_algorithm;
struct working_area *source; struct working_area *source;
uint32_t address = bank->base + offset; uint32_t address = bank->base + offset;
struct reg_param reg_params[6]; struct reg_param reg_params[5];
struct armv7m_algorithm armv7m_info; struct armv7m_algorithm armv7m_info;
int retval = ERROR_OK; int retval = ERROR_OK;
@ -1345,12 +1344,13 @@ static int stm32l4_write_block(struct flash_bank *bank, const uint8_t *buffer,
return retval; return retval;
} }
/* memory buffer, size *must* be multiple of stm32l4_info->data_width /* data_width should be multiple of double-word */
* plus one dword for rp and one for wp */ assert(stm32l4_info->data_width % 8 == 0);
/* FIXME, currently only STM32U5 devices do have a different data_width, const size_t extra_size = sizeof(struct stm32l4_work_area);
* but STM32U5 device flash programming does not go through this function uint32_t buffer_size = target_get_working_area_avail(target) - extra_size;
* so temporarily continue to consider the default data_width = 8 */ /* buffer_size should be multiple of stm32l4_info->data_width */
buffer_size = target_get_working_area_avail(target) & ~(2 * sizeof(uint32_t) - 1); buffer_size &= ~(stm32l4_info->data_width - 1);
if (buffer_size < 256) { if (buffer_size < 256) {
LOG_WARNING("large enough working area not available, can't do block memory writes"); LOG_WARNING("large enough working area not available, can't do block memory writes");
target_free_working_area(target, write_algorithm); target_free_working_area(target, write_algorithm);
@ -1360,7 +1360,7 @@ static int stm32l4_write_block(struct flash_bank *bank, const uint8_t *buffer,
buffer_size = 16384; buffer_size = 16384;
} }
if (target_alloc_working_area_try(target, buffer_size, &source) != ERROR_OK) { if (target_alloc_working_area_try(target, buffer_size + extra_size, &source) != ERROR_OK) {
LOG_ERROR("allocating working area failed"); LOG_ERROR("allocating working area failed");
return ERROR_TARGET_RESOURCE_NOT_AVAILABLE; return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
} }
@ -1371,28 +1371,46 @@ static int stm32l4_write_block(struct flash_bank *bank, const uint8_t *buffer,
init_reg_param(&reg_params[0], "r0", 32, PARAM_IN_OUT); /* buffer start, status (out) */ init_reg_param(&reg_params[0], "r0", 32, PARAM_IN_OUT); /* buffer start, status (out) */
init_reg_param(&reg_params[1], "r1", 32, PARAM_OUT); /* buffer end */ init_reg_param(&reg_params[1], "r1", 32, PARAM_OUT); /* buffer end */
init_reg_param(&reg_params[2], "r2", 32, PARAM_OUT); /* target address */ init_reg_param(&reg_params[2], "r2", 32, PARAM_OUT); /* target address */
init_reg_param(&reg_params[3], "r3", 32, PARAM_OUT); /* count (double word-64bit) */ init_reg_param(&reg_params[3], "r3", 32, PARAM_OUT); /* count (of stm32l4_info->data_width) */
init_reg_param(&reg_params[4], "r4", 32, PARAM_OUT); /* flash status register */ init_reg_param(&reg_params[4], "sp", 32, PARAM_OUT); /* write algo stack pointer */
init_reg_param(&reg_params[5], "r5", 32, PARAM_OUT); /* flash control register */
buf_set_u32(reg_params[0].value, 0, 32, source->address); buf_set_u32(reg_params[0].value, 0, 32, source->address);
buf_set_u32(reg_params[1].value, 0, 32, source->address + source->size); buf_set_u32(reg_params[1].value, 0, 32, source->address + source->size);
buf_set_u32(reg_params[2].value, 0, 32, address); buf_set_u32(reg_params[2].value, 0, 32, address);
buf_set_u32(reg_params[3].value, 0, 32, count); buf_set_u32(reg_params[3].value, 0, 32, count);
buf_set_u32(reg_params[4].value, 0, 32, stm32l4_get_flash_reg_by_index(bank, STM32_FLASH_SR_INDEX)); buf_set_u32(reg_params[4].value, 0, 32, source->address +
buf_set_u32(reg_params[5].value, 0, 32, stm32l4_get_flash_reg_by_index(bank, STM32_FLASH_CR_INDEX)); offsetof(struct stm32l4_work_area, stack) + LDR_STACK_SIZE);
struct stm32l4_loader_params loader_extra_params;
target_buffer_set_u32(target, (uint8_t *) &loader_extra_params.flash_sr_addr,
stm32l4_get_flash_reg_by_index(bank, STM32_FLASH_SR_INDEX));
target_buffer_set_u32(target, (uint8_t *) &loader_extra_params.flash_cr_addr,
stm32l4_get_flash_reg_by_index(bank, STM32_FLASH_CR_INDEX));
target_buffer_set_u32(target, (uint8_t *) &loader_extra_params.flash_word_size,
stm32l4_info->data_width);
target_buffer_set_u32(target, (uint8_t *) &loader_extra_params.flash_sr_bsy_mask,
stm32l4_info->sr_bsy_mask);
retval = target_write_buffer(target, source->address, sizeof(loader_extra_params),
(uint8_t *) &loader_extra_params);
if (retval != ERROR_OK)
return retval;
retval = target_run_flash_async_algorithm(target, buffer, count, stm32l4_info->data_width, retval = target_run_flash_async_algorithm(target, buffer, count, stm32l4_info->data_width,
0, NULL, 0, NULL,
ARRAY_SIZE(reg_params), reg_params, ARRAY_SIZE(reg_params), reg_params,
source->address, source->size, source->address + offsetof(struct stm32l4_work_area, fifo),
source->size - offsetof(struct stm32l4_work_area, fifo),
write_algorithm->address, 0, write_algorithm->address, 0,
&armv7m_info); &armv7m_info);
if (retval == ERROR_FLASH_OPERATION_FAILED) { if (retval == ERROR_FLASH_OPERATION_FAILED) {
LOG_ERROR("error executing stm32l4 flash write algorithm"); LOG_ERROR("error executing stm32l4 flash write algorithm");
uint32_t error = buf_get_u32(reg_params[0].value, 0, 32) & FLASH_ERROR; uint32_t error;
stm32l4_read_flash_reg_by_index(bank, STM32_FLASH_SR_INDEX, &error);
error &= FLASH_ERROR;
if (error & FLASH_WRPERR) if (error & FLASH_WRPERR)
LOG_ERROR("flash memory write protected"); LOG_ERROR("flash memory write protected");
@ -1413,7 +1431,6 @@ static int stm32l4_write_block(struct flash_bank *bank, const uint8_t *buffer,
destroy_reg_param(&reg_params[2]); destroy_reg_param(&reg_params[2]);
destroy_reg_param(&reg_params[3]); destroy_reg_param(&reg_params[3]);
destroy_reg_param(&reg_params[4]); destroy_reg_param(&reg_params[4]);
destroy_reg_param(&reg_params[5]);
return retval; return retval;
} }
@ -1538,24 +1555,7 @@ static int stm32l4_write(struct flash_bank *bank, const uint8_t *buffer,
if (retval != ERROR_OK) if (retval != ERROR_OK)
goto err_lock; goto err_lock;
/** if (stm32l4_info->use_flashloader) {
* FIXME update the flash loader to use a custom FLASH_SR_BSY mask
* Workaround for STM32G0Bx/G0Cx devices in dual bank mode,
* as the flash loader does not use the SR_BSY2
*/
bool use_flashloader = stm32l4_info->use_flashloader;
if ((stm32l4_info->part_info->id == 0x467) && stm32l4_info->dual_bank_mode) {
LOG_INFO("Couldn't use the flash loader in dual-bank mode");
use_flashloader = false;
} else if (stm32l4_info->part_info->id == 0x482) {
/**
* FIXME the current flashloader does not support writing in quad-words
* which is required for STM32U5 devices.
*/
use_flashloader = false;
}
if (use_flashloader) {
/* For TrustZone enabled devices, when TZEN is set and RDP level is 0.5, /* For TrustZone enabled devices, when TZEN is set and RDP level is 0.5,
* the debug is possible only in non-secure state. * the debug is possible only in non-secure state.
* Thus means the flashloader will run in non-secure mode, * Thus means the flashloader will run in non-secure mode,
@ -1567,7 +1567,7 @@ static int stm32l4_write(struct flash_bank *bank, const uint8_t *buffer,
count / stm32l4_info->data_width); count / stm32l4_info->data_width);
} }
if (!use_flashloader || retval == ERROR_TARGET_RESOURCE_NOT_AVAILABLE) { if (!stm32l4_info->use_flashloader || retval == ERROR_TARGET_RESOURCE_NOT_AVAILABLE) {
LOG_INFO("falling back to single memory accesses"); LOG_INFO("falling back to single memory accesses");
retval = stm32l4_write_block_without_loader(bank, buffer, offset, retval = stm32l4_write_block_without_loader(bank, buffer, offset,
count / stm32l4_info->data_width); count / stm32l4_info->data_width);

View File

@ -92,4 +92,22 @@
#define STM32L5_REGS_SEC_OFFSET 0x10000000 #define STM32L5_REGS_SEC_OFFSET 0x10000000
/* 100 bytes as loader stack should be large enough for the loader to operate */
#define LDR_STACK_SIZE 100
struct stm32l4_work_area {
struct stm32l4_loader_params {
uint32_t flash_sr_addr;
uint32_t flash_cr_addr;
uint32_t flash_word_size;
uint32_t flash_sr_bsy_mask;
} params;
uint8_t stack[LDR_STACK_SIZE];
struct flash_async_algorithm_circbuf {
uint8_t *wp;
uint8_t *rp;
uint8_t *buf;
} fifo;
};
#endif #endif