	.syntax unified
	.cpu cortex-m3
	.global write

	/* Params:
	 * r0 - flash base (in), status (out)
	 * r1 - count (halfword-16bit)
	 * r2 - workarea start
	 * r3 - workarea end
	 * r4 - target address
	 * Clobbered:
	 * r5 - rp
	 * r6 - wp, tmp

#define STM32_FLASH_CR_OFFSET 0x10 /* offset of CR register from flash reg base */
#define STM32_FLASH_SR_OFFSET 0x0c /* offset of SR register from flash reg base */

	ldr 	r6, [r2, #0]	/* read wp */
	cmp 	r6, #0			/* abort if wp == 0 */
	beq 	exit
	ldr 	r5, [r2, #4]	/* read rp */
	cmp 	r5, r6			/* wait until rp != wp */
	beq 	wait_fifo
	movs	r6, #1			/* set PG flag to enable flash programming */
	str 	r6, [r0, #STM32_FLASH_CR_OFFSET]
	ldrh	r6, [r5], #2	/* "*target_address++ = *rp++" */
	strh	r6, [r4], #2
	ldr 	r6, [r0, #STM32_FLASH_SR_OFFSET]	/* wait until BSY flag is reset */
	tst 	r6, #1
	bne 	busy
	tst 	r6, #0x14		/* check the error bits */
	bne 	error
	cmp 	r5, r3			/* wrap rp at end of buffer */
	it  	cs
	addcs	r5, r2, #8
	str 	r5, [r2, #4]	/* store rp */
	subs	r1, r1, #1		/* decrement halfword count */
	cbz 	r1, exit		/* loop if not done */
	b		wait_fifo
	movs	r0, #0
	str 	r0, [r2, #2]	/* set rp = 0 on error */
	mov		r0, r6			/* return status in r0 */
	bkpt	#0