diff --git a/openfpga_flow/regression_test_scripts/fpga_bitstream_reg_test.sh b/openfpga_flow/regression_test_scripts/fpga_bitstream_reg_test.sh
index 1abe72694..3f77b067b 100755
--- a/openfpga_flow/regression_test_scripts/fpga_bitstream_reg_test.sh
+++ b/openfpga_flow/regression_test_scripts/fpga_bitstream_reg_test.sh
@@ -10,12 +10,15 @@ echo -e "FPGA-Bitstream regression tests";
echo -e "Testing bitstream generation for an auto-sized device";
run-task fpga_bitstream/generate_bitstream/configuration_chain/device_auto $@
+run-task fpga_bitstream/generate_bitstream/ql_memory_bank_shift_register/device_auto $@
echo -e "Testing bitstream generation for an 48x48 FPGA device";
run-task fpga_bitstream/generate_bitstream/configuration_chain/device_48x48 $@
+run-task fpga_bitstream/generate_bitstream/ql_memory_bank_shift_register/device_48x48 $@
echo -e "Testing bitstream generation for an 96x96 FPGA device";
run-task fpga_bitstream/generate_bitstream/configuration_chain/device_96x96 $@
+run-task fpga_bitstream/generate_bitstream/ql_memory_bank_shift_register/device_96x96 $@
echo -e "Testing loading architecture bitstream from an external file";
run-task fpga_bitstream/load_external_architecture_bitstream $@
diff --git a/openfpga_flow/tasks/fpga_bitstream/generate_bitstream/ql_memory_bank_shift_register/device_48x48/config/task.conf b/openfpga_flow/tasks/fpga_bitstream/generate_bitstream/ql_memory_bank_shift_register/device_48x48/config/task.conf
new file mode 100644
index 000000000..14f55026c
--- /dev/null
+++ b/openfpga_flow/tasks/fpga_bitstream/generate_bitstream/ql_memory_bank_shift_register/device_48x48/config/task.conf
@@ -0,0 +1,38 @@
+# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+# Configuration file for running experiments
+# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs
+# Each job execute fpga_flow script on combination of architecture & benchmark
+# timeout_each_job is timeout for each job
+# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+
+[GENERAL]
+run_engine=openfpga_shell
+power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
+power_analysis = false
+spice_output=false
+verilog_output=true
+# Runtime of this bitstream generation should not exceed 3 minutes as a QoR requirement
+timeout_each_job = 3*60
+fpga_flow=yosys_vpr
+
+[OpenFPGA_SHELL]
+openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/generate_bitstream_fix_device_example_script.openfpga
+openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_qlbanksr_openfpga.xml
+openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/fixed_sim_openfpga.xml
+# VPR parameters
+openfpga_vpr_route_chan_width=50
+openfpga_vpr_device_layout=48x48
+
+[ARCHITECTURES]
+arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_40nm.xml
+
+[BENCHMARKS]
+bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/SAPone/rtl/*
+
+[SYNTHESIS_PARAM]
+# Yosys script parameters
+bench_read_verilog_options_common = -nolatches
+bench0_top = SAPone
+
+[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
diff --git a/openfpga_flow/tasks/fpga_bitstream/generate_bitstream/ql_memory_bank_shift_register/device_96x96/config/task.conf b/openfpga_flow/tasks/fpga_bitstream/generate_bitstream/ql_memory_bank_shift_register/device_96x96/config/task.conf
new file mode 100644
index 000000000..00c5de64d
--- /dev/null
+++ b/openfpga_flow/tasks/fpga_bitstream/generate_bitstream/ql_memory_bank_shift_register/device_96x96/config/task.conf
@@ -0,0 +1,36 @@
+# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+# Configuration file for running experiments
+# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs
+# Each job execute fpga_flow script on combination of architecture & benchmark
+# timeout_each_job is timeout for each job
+# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+
+[GENERAL]
+run_engine=openfpga_shell
+power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
+power_analysis = false
+spice_output=false
+verilog_output=true
+# Runtime of this bitstream generation should not exceed 6 minutes as a QoR requirement
+timeout_each_job = 6*60
+fpga_flow=yosys_vpr
+
+[OpenFPGA_SHELL]
+openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/generate_bitstream_fix_device_example_script.openfpga
+openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_qlbanksr_openfpga.xml
+openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/fixed_sim_openfpga.xml
+openfpga_vpr_route_chan_width=100
+openfpga_vpr_device_layout=96x96
+
+[ARCHITECTURES]
+arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_40nm.xml
+
+[BENCHMARKS]
+bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/RISC_posedge_clk/rtl/*.v
+
+[SYNTHESIS_PARAM]
+bench_read_verilog_options_common = -nolatches
+bench0_top = RISC_core_top
+
+[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
diff --git a/openfpga_flow/tasks/fpga_bitstream/generate_bitstream/ql_memory_bank_shift_register/device_auto/config/task.conf b/openfpga_flow/tasks/fpga_bitstream/generate_bitstream/ql_memory_bank_shift_register/device_auto/config/task.conf
new file mode 100644
index 000000000..e7c61713c
--- /dev/null
+++ b/openfpga_flow/tasks/fpga_bitstream/generate_bitstream/ql_memory_bank_shift_register/device_auto/config/task.conf
@@ -0,0 +1,33 @@
+# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+# Configuration file for running experiments
+# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs
+# Each job execute fpga_flow script on combination of architecture & benchmark
+# timeout_each_job is timeout for each job
+# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+
+[GENERAL]
+run_engine=openfpga_shell
+power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
+power_analysis = true
+spice_output=false
+verilog_output=true
+timeout_each_job = 20*60
+fpga_flow=yosys_vpr
+
+[OpenFPGA_SHELL]
+openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/generate_bitstream_example_script.openfpga
+openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_qlbanksr_openfpga.xml
+openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml
+
+[ARCHITECTURES]
+arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_40nm.xml
+
+[BENCHMARKS]
+bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.v
+
+[SYNTHESIS_PARAM]
+bench_read_verilog_options_common = -nolatches
+bench0_top = and2
+
+[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
diff --git a/openfpga_flow/vpr_arch/k4_N4_tileable_40nm.xml b/openfpga_flow/vpr_arch/k4_N4_tileable_40nm.xml
index 2caa8b1ba..056dcb034 100644
--- a/openfpga_flow/vpr_arch/k4_N4_tileable_40nm.xml
+++ b/openfpga_flow/vpr_arch/k4_N4_tileable_40nm.xml
@@ -84,6 +84,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+