arm11: improve performance using minidriver hook
zy1000 performance for GDB load went from 100kBytes/s to 300kBytes/s @ 8 MHz by implementing the inner loop of unack arm11 memory writes directly on top of the hw fifo. Profiling info: 78.57 0.77 0.77 arm11_run_instr_data_to_core_noack_inner 5.10 0.82 0.05 memcpy 4.08 0.86 0.04 jtag_tap_next_enabled 3.06 0.89 0.03 gdb_input Signed-off-by: Øyvind Harboe <oyvind.harboe@zylin.com>
This commit is contained in:
parent
1f5883ea56
commit
faef631a4d
|
@ -845,11 +845,73 @@ void embeddedice_write_dcc(struct jtag_tap *tap, int reg_addr, uint8_t *buffer,
|
|||
}
|
||||
|
||||
|
||||
int arm11_run_instr_data_to_core_noack_inner_default(struct arm11_common * arm11, uint32_t opcode, uint32_t * data, size_t count);
|
||||
|
||||
int arm11_run_instr_data_to_core_noack_inner(struct arm11_common * arm11, uint32_t opcode, uint32_t * data, size_t count)
|
||||
int arm11_run_instr_data_to_core_noack_inner(struct jtag_tap * tap, uint32_t opcode, uint32_t * data, size_t count)
|
||||
{
|
||||
return arm11_run_instr_data_to_core_noack_inner_default(arm11, opcode, data, count);
|
||||
#if 0
|
||||
int arm11_run_instr_data_to_core_noack_inner_default(struct jtag_tap * tap, uint32_t opcode, uint32_t * data, size_t count);
|
||||
return arm11_run_instr_data_to_core_noack_inner_default(tap, opcode, data, count);
|
||||
#else
|
||||
static const int bits[] = {32, 2};
|
||||
uint32_t values[] = {0, 0};
|
||||
|
||||
/* FIX!!!!!! the target_write_memory() API started this nasty problem
|
||||
* with unaligned uint32_t * pointers... */
|
||||
const uint8_t *t = (const uint8_t *)data;
|
||||
|
||||
while (count--)
|
||||
{
|
||||
values[0] = *t++;
|
||||
values[0] |= (*t++<<8);
|
||||
values[0] |= (*t++<<16);
|
||||
values[0] |= (*t++<<24);
|
||||
|
||||
if (count > 0)
|
||||
{
|
||||
jtag_add_dr_out(tap,
|
||||
2,
|
||||
bits,
|
||||
values,
|
||||
TAP_DRPAUSE);
|
||||
|
||||
#if 1
|
||||
/* copy & paste from arm11_dbgtap.c */
|
||||
//TAP_DREXIT2, TAP_DRUPDATE, TAP_IDLE, TAP_IDLE, TAP_IDLE, TAP_DRSELECT, TAP_DRCAPTURE, TAP_DRSHIFT
|
||||
|
||||
waitIdle();
|
||||
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
|
||||
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
|
||||
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
|
||||
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
|
||||
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
|
||||
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1);
|
||||
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
|
||||
ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0);
|
||||
waitIdle();
|
||||
ZY1000_POKE(ZY1000_JTAG_BASE + 0x20, TAP_DRSHIFT);
|
||||
#else
|
||||
static const tap_state_t arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay[] =
|
||||
{
|
||||
TAP_DREXIT2, TAP_DRUPDATE, TAP_IDLE, TAP_IDLE, TAP_IDLE, TAP_DRSELECT, TAP_DRCAPTURE, TAP_DRSHIFT
|
||||
};
|
||||
|
||||
jtag_add_pathmove(ARRAY_SIZE(arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay),
|
||||
arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay);
|
||||
#endif
|
||||
} else
|
||||
{
|
||||
/* This will happen on the last iteration updating the current tap state
|
||||
* so we don't have to track it during the common code path */
|
||||
jtag_add_dr_out(tap,
|
||||
2,
|
||||
bits,
|
||||
values,
|
||||
TAP_IDLE);
|
||||
}
|
||||
}
|
||||
|
||||
return jtag_execute_queue();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue