// SPDX-License-Identifier: GPL-2.0-or-later

/**
 * Copyright (C) 2021 Tarek BOCHKATI
 *   tarek.bouchkati@st.com
 */

#define OPENOCD_CONTRIB_LOADERS_FLASH_STM32_STM32L4X

#include <stdint.h>
#include "../../../../src/flash/nor/stm32l4x.h"

static inline __attribute__((always_inline))
void copy_buffer_u32(uint32_t *dst, uint32_t *src, int len)
{
	for (int i = 0; i < len; i++)
		dst[i] = src[i];
}

/* this function is assumes that fifo_size is multiple of flash_word_size
 * this condition is ensured by target_run_flash_async_algorithm
 */

void write(volatile struct stm32l4_work_area *work_area,
		   uint8_t *fifo_end,
		   uint8_t *target_address,
		   uint32_t count)
{
	volatile uint32_t *flash_sr = (uint32_t *) work_area->params.flash_sr_addr;
	volatile uint32_t *flash_cr = (uint32_t *) work_area->params.flash_cr_addr;

	/* optimization to avoid reading from memory each time */
	uint8_t *rp_cache  = work_area->fifo.rp;

	/* fifo_start is used to wrap when we reach fifo_end */
	uint8_t *fifo_start = rp_cache;

	/* enable flash programming */
	*flash_cr = FLASH_PG;

	while (count) {
		/* optimization to avoid reading from memory each time */
		uint8_t *wp_cache  = work_area->fifo.wp;
		if (wp_cache == 0)
			break; /* aborted by target_run_flash_async_algorithm */

		int32_t fifo_size = wp_cache - rp_cache;
		if (fifo_size < 0) {
			/* consider the linear fifo, we will wrap later */
			fifo_size = fifo_end - rp_cache;
		}

		/* wait for at least a flash word */
		while (fifo_size >= work_area->params.flash_word_size) {
			copy_buffer_u32((uint32_t *)target_address,
					(uint32_t *)rp_cache,
					work_area->params.flash_word_size / 4);

			/* update target_address and rp_cache */
			target_address += work_area->params.flash_word_size;
			rp_cache += work_area->params.flash_word_size;

			/* wait for the busy flag */
			while (*flash_sr & work_area->params.flash_sr_bsy_mask)
				;

			if (*flash_sr & FLASH_ERROR) {
				work_area->fifo.rp = 0; /* set rp to zero 0 on error */
				goto write_end;
			}

			/* wrap if reach the fifo_end, and update rp in memory */
			if (rp_cache >= fifo_end)
				rp_cache = fifo_start;

			/* flush the rp cache value,
			 * so target_run_flash_async_algorithm can fill the circular fifo */
			work_area->fifo.rp = rp_cache;

			/* update fifo_size and count */
			fifo_size -= work_area->params.flash_word_size;
			count--;
		}
	}

write_end:
	/* disable flash programming */
	*flash_cr = 0;

	/* soft break the loader */
	__asm("bkpt 0");
}

/* by enabling this define 'DEBUG':
 * the main() function can help help debugging the loader algo
 * note: the application should be linked into RAM */

/* #define DEBUG */

#ifdef DEBUG
/* device selector: STM32L5 | STM32U5 | STM32WB | STM32WL | STM32WL_CPU2 | STM32G0Bx | ... */
#define STM32U5

/* when using a secure device, and want to test the secure programming enable this define */
/* #define SECURE */

#if defined(STM32U5)
#  define FLASH_WORD_SIZE   16
#else
#  define FLASH_WORD_SIZE   8
#endif

#if defined(STM32WB) || defined(STM32WL)
#  define FLASH_BASE        0x58004000
#else
#  define FLASH_BASE        0x40022000
#endif

#if defined(STM32G0Bx)
#  define FLASH_BSY_MASK      (FLASH_BSY | FLASH_BSY2)
#else
#  define FLASH_BSY_MASK      FLASH_BSY
#endif

#if defined(STM32L5) || defined(STM32U5)
#  ifdef SECURE
#    define FLASH_KEYR_OFFSET 0x0c
#    define FLASH_SR_OFFSET   0x24
#    define FLASH_CR_OFFSET   0x2c
#  else
#    define FLASH_KEYR_OFFSET 0x08
#    define FLASH_SR_OFFSET   0x20
#    define FLASH_CR_OFFSET   0x28
#  endif
#elif defined(STM32WL_CPU2)
#  define FLASH_KEYR_OFFSET 0x08
#  define FLASH_SR_OFFSET   0x60
#  define FLASH_CR_OFFSET   0x64
#else
#  define FLASH_KEYR_OFFSET 0x08
#  define FLASH_SR_OFFSET   0x10
#  define FLASH_CR_OFFSET   0x14
#endif

#define FLASH_KEYR        (uint32_t *)((FLASH_BASE) + (FLASH_KEYR_OFFSET))
#define FLASH_SR          (uint32_t *)((FLASH_BASE) + (FLASH_SR_OFFSET))
#define FLASH_CR          (uint32_t *)((FLASH_BASE) + (FLASH_CR_OFFSET))

int main()
{
	const uint32_t count = 2;
	const uint32_t buf_size = count * FLASH_WORD_SIZE;
	const uint32_t work_area_size = sizeof(struct stm32l4_work_area) + buf_size;

	uint8_t work_area_buf[work_area_size];
	struct stm32l4_work_area *workarea = (struct stm32l4_work_area *)work_area_buf;

	/* fill the workarea struct */
	workarea->params.flash_sr_addr = (uint32_t)(FLASH_SR);
	workarea->params.flash_cr_addr = (uint32_t)(FLASH_CR);
	workarea->params.flash_word_size = FLASH_WORD_SIZE;
	workarea->params.flash_sr_bsy_mask = FLASH_BSY_MASK;
	/* note: the workarea->stack is not used, in this configuration */

	/* programming the existing memory raw content in workarea->fifo.buf */
	/* feel free to fill the memory with magical values ... */

	workarea->fifo.wp =  (uint8_t *)(&workarea->fifo.buf + buf_size);
	workarea->fifo.rp =  (uint8_t *)&workarea->fifo.buf;

	/* unlock the flash */
	*FLASH_KEYR = KEY1;
	*FLASH_KEYR = KEY2;

	/* erase sector 0 */
	*FLASH_CR = FLASH_PER | FLASH_STRT;
	while (*FLASH_SR & FLASH_BSY)
		;

	/* flash address, should be aligned to FLASH_WORD_SIZE */
	uint8_t *target_address = (uint8_t *) 0x8000000;

	write(workarea,
		  (uint8_t *)(workarea + work_area_size),
		  target_address,
		  count);

	while (1)
		;
}
#endif /* DEBUG */