From ba34ebb4e573d76715561f1d7ce532b734155b40 Mon Sep 17 00:00:00 2001
From: Lalit Sharma <lsharma@quicklogic.com>
Date: Wed, 13 Jan 2021 00:48:03 -0800
Subject: [PATCH] Removing commented sections/attributes. Also corrected
 indentation

---
 ...avel_io_skywater130nm_fdhd_cc_openfpga.xml | 153 ++++++---------
 ...n_chain_nonLR_caravel_io_skywater130nm.xml | 185 ++++++++----------
 2 files changed, 148 insertions(+), 190 deletions(-)
diff --git a/ARCH/openfpga_arch_template/k4_N8_reset_softadder_register_scan_chain_caravel_io_skywater130nm_fdhd_cc_openfpga.xml b/ARCH/openfpga_arch_template/k4_N8_reset_softadder_register_scan_chain_caravel_io_skywater130nm_fdhd_cc_openfpga.xml
index e107260..1fff339 100644
--- a/ARCH/openfpga_arch_template/k4_N8_reset_softadder_register_scan_chain_caravel_io_skywater130nm_fdhd_cc_openfpga.xml
+++ b/ARCH/openfpga_arch_template/k4_N8_reset_softadder_register_scan_chain_caravel_io_skywater130nm_fdhd_cc_openfpga.xml
@@ -1,14 +1,14 @@
 <!-- Architecture annotation for OpenFPGA framework
-     This annotation supports the k4_frac_cc_sky130nm.xml
-     - General purpose logic block
-       - K = 6, N = 10, I = 40
-       - Single mode
-     - Routing architecture
-       - L = 4, fc_in = 0.15, fc_out = 0.1
-     - Skywater 130nm PDK
-       - circuit models are binded to the opensource skywater
-         foundry middle-speed (ms) standard cell library
-  -->
+This annotation supports the k4_frac_cc_sky130nm.xml
+- General purpose logic block
+- K = 6, N = 10, I = 40
+- Single mode
+- Routing architecture
+- L = 4, fc_in = 0.15, fc_out = 0.1
+- Skywater 130nm PDK
+- circuit models are binded to the opensource skywater
+foundry middle-speed (ms) standard cell library
+-->
 <openfpga_architecture>
   <technology_library>
     <device_library>
@@ -43,7 +43,7 @@
         10e-12
       </delay_matrix>
     </circuit_model>
-	<circuit_model type="inv_buf" name="sky130_fd_sc_hd__buf_1" prefix="sky130_fd_sc_hd__buf_1" is_default="false" verilog_netlist="${SKYWATER_OPENFPGA_HOME}/PDK/skywater-pdk/libraries/sky130_fd_sc_hd/latest/cells/buf/sky130_fd_sc_hd__buf_1.v">
+    <circuit_model type="inv_buf" name="sky130_fd_sc_hd__buf_1" prefix="sky130_fd_sc_hd__buf_1" is_default="false" verilog_netlist="${SKYWATER_OPENFPGA_HOME}/PDK/skywater-pdk/libraries/sky130_fd_sc_hd/latest/cells/buf/sky130_fd_sc_hd__buf_1.v">
       <design_technology type="cmos" topology="buffer" size="1"/>
       <device_technology device_model_name="logic"/>
       <port type="input" prefix="in" lib_name="A" size="1"/>
@@ -79,7 +79,7 @@
         10e-12
       </delay_matrix>
     </circuit_model>
-	<circuit_model type="inv_buf" name="sky130_fd_sc_hd__buf_8" prefix="sky130_fd_sc_hd__buf_8" is_default="false" verilog_netlist="${SKYWATER_OPENFPGA_HOME}/PDK/skywater-pdk/libraries/sky130_fd_sc_hd/latest/cells/buf/sky130_fd_sc_hd__buf_8.v">
+    <circuit_model type="inv_buf" name="sky130_fd_sc_hd__buf_8" prefix="sky130_fd_sc_hd__buf_8" is_default="false" verilog_netlist="${SKYWATER_OPENFPGA_HOME}/PDK/skywater-pdk/libraries/sky130_fd_sc_hd/latest/cells/buf/sky130_fd_sc_hd__buf_8.v">
       <design_technology type="cmos" topology="buffer" size="1" num_level="2" f_per_stage="4"/>
       <device_technology device_model_name="logic"/>
       <port type="input" prefix="in" lib_name="A" size="1"/>
@@ -127,7 +127,7 @@
         10e-12
       </delay_matrix>
     </circuit_model>
-	<!--  Trick OpenFPGA to avoid auto-generating TGATE modules, which are not used in PnR -->
+    <!--  Trick OpenFPGA to avoid auto-generating TGATE modules, which are not used in PnR -->
     <circuit_model type="pass_gate" name="TGATE" prefix="TGATE" is_default="true" verilog_netlist="${SKYWATER_OPENFPGA_HOME}/HDL/common/fd_hd_mux_custom_cells_tt.v">
       <design_technology type="cmos" topology="transmission_gate" nmos_size="1" pmos_size="2"/>
       <device_technology device_model_name="logic"/>
@@ -160,12 +160,12 @@
       </delay_matrix>
     </circuit_model>
     <!-- Define a circuit model for the standard cell MUX2
-         OpenFPGA requires the following truth table for the MUX2
-         When the select signal sel is enabled, the first input, i.e., in0
-         will be propagated to the output, i.e., out
-         If your standard cell provider does not offer the exact truth table,
-         you can simply swap the inputs as shown in the example below
-      -->
+      OpenFPGA requires the following truth table for the MUX2
+      When the select signal sel is enabled, the first input, i.e., in0
+      will be propagated to the output, i.e., out
+      If your standard cell provider does not offer the exact truth table,
+      you can simply swap the inputs as shown in the example below
+    -->
     <circuit_model type="gate" name="sky130_fd_sc_hd__mux2_1" prefix="sky130_fd_sc_hd__mux2_1" verilog_netlist="${SKYWATER_OPENFPGA_HOME}/PDK/skywater-pdk/libraries/sky130_fd_sc_hd/latest/cells/mux2/sky130_fd_sc_hd__mux2_1.v">
       <design_technology type="cmos" topology="MUX2"/>
       <device_technology device_model_name="logic"/>
@@ -183,7 +183,7 @@
       <port type="input" prefix="in" size="1"/>
       <port type="output" prefix="out" size="1"/>
       <wire_param model_type="pi" R="101" C="22.5e-15" num_level="1"/>
-      <!-- model_type could be T, res_val and cap_val DON'T CARE -->
+        <!-- model_type could be T, res_val and cap_val DON'T CARE -->
     </circuit_model>
     <circuit_model type="wire" name="direct_interc" prefix="direct_interc" is_default="true">
       <design_technology type="cmos"/>
@@ -192,7 +192,7 @@
       <port type="input" prefix="in" size="1"/>
       <port type="output" prefix="out" size="1"/>
       <wire_param model_type="pi" R="0" C="0" num_level="1"/>
-      <!-- model_type could be T, res_val cap_val should be defined -->
+        <!-- model_type could be T, res_val cap_val should be defined -->
     </circuit_model>
     <circuit_model type="mux" name="mux_2level" prefix="mux_2level" dump_structural_verilog="true">
       <design_technology type="cmos" structure="multi_level" num_level="2" add_const_input="true" const_input_val="1" local_encoder="true"/>
@@ -230,8 +230,7 @@
       <port type="output" prefix="out" size="1"/>
       <port type="sram" prefix="sram" size="1"/>
     </circuit_model>
-    <!--circuit_model type="mux" name="mux_1level" is_default="true" prefix="mux_1level" dump_structural_verilog="true"-->
-	<circuit_model type="mux" name="mux_1level" prefix="mux_1level" dump_structural_verilog="true">
+    <circuit_model type="mux" name="mux_1level" prefix="mux_1level" dump_structural_verilog="true">
       <design_technology type="cmos" structure="multi_level" num_level="1" add_const_input="true" const_input_val="1" local_encoder="true"/>
       <input_buffer exist="false"/>
       <output_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
@@ -240,7 +239,7 @@
       <port type="output" prefix="out" size="1"/>
       <port type="sram" prefix="sram" size="1"/>
     </circuit_model>
-	<circuit_model type="mux" name="mux_1level_io" prefix="mux_1level_io" dump_structural_verilog="true">
+    <circuit_model type="mux" name="mux_1level_io" prefix="mux_1level_io" dump_structural_verilog="true">
       <design_technology type="cmos" structure="multi_level" num_level="1" local_encoder="false"/>
       <input_buffer exist="false"/>
       <output_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__buf_1"/>
@@ -249,7 +248,7 @@
       <port type="output" prefix="out" size="1"/>
       <port type="sram" prefix="sram" size="1"/>
     </circuit_model>
-	<circuit_model type="mux" name="mux_1level_fabric" prefix="mux_1level_fabric" dump_structural_verilog="true">
+    <circuit_model type="mux" name="mux_1level_fabric" prefix="mux_1level_fabric" dump_structural_verilog="true">
       <design_technology type="cmos" structure="multi_level" num_level="1" local_encoder="false"/>
       <input_buffer exist="false"/>
       <output_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__buf_1"/>
@@ -267,7 +266,7 @@
       <port type="output" prefix="out" size="1"/>
       <port type="sram" prefix="sram" size="1"/>
     </circuit_model>
-	<circuit_model type="mux" name="mux_tree" prefix="mux_tree" is_default="true" dump_structural_verilog="true">
+    <circuit_model type="mux" name="mux_tree" prefix="mux_tree" is_default="true" dump_structural_verilog="true">
       <design_technology type="cmos" structure="tree" add_const_input="true" const_input_val="1"/>
       <input_buffer exist="false"/>
       <output_buffer exist="false"/>
@@ -285,7 +284,6 @@
       <port type="output" prefix="out" size="1"/>
       <port type="sram" prefix="sram" size="1"/>
     </circuit_model>
-    <!--DFF subckt ports should be defined as <D> <Q> <CLK> <RESET> <SET>  -->
     <circuit_model type="ff" name="sky130_fd_sc_hd__sdfrtp_1" prefix="sky130_fd_sc_hd__sdfrtp_1" verilog_netlist="${SKYWATER_OPENFPGA_HOME}/PDK/skywater-pdk/libraries/sky130_fd_sc_hd/latest/cells/sdfrtp/sky130_fd_sc_hd__sdfrtp_1.v">
       <design_technology type="cmos"/>
       <input_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
@@ -305,28 +303,25 @@
       <lut_input_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__buf_2"/>
       <lut_intermediate_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__buf_2" location_map="-1-"/>
       <pass_gate_logic circuit_model_name="sky130_fd_sc_hd__mux2_1"/>
-      <!--port type="input" prefix="in" size="4" tri_state_map="---1" circuit_model_name="sky130_fd_sc_hd__or2_1"/-->
-	  <port type="input" prefix="in" size="4"/>
+      <port type="input" prefix="in" size="4"/>
       <port type="output" prefix="lut2_out" size="2" lut_frac_level="2" lut_output_mask="2,3"/>
-      <!--port type="output" prefix="lut3_out" size="2" lut_frac_level="3" lut_output_mask="0,1"/-->
       <port type="output" prefix="lut4_out" size="1" lut_output_mask="0"/>
       <port type="sram" prefix="sram" size="16"/>
-      <!--port type="sram" prefix="mode" size="1" mode_select="true" circuit_model_name="CFGSDFFR" default_val="1"/-->
     </circuit_model>
     <!-- new ccFF  -->
-	<circuit_model type="ccff" name="CFGSDFFR" prefix="CFGSDFFR" spice_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/spice/dff.sp" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/dff.v">
-	   <design_technology type="cmos"/>
-       <input_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
-       <output_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
-	   <port type="input" prefix="pReset" lib_name="RST" size="1" is_global="true" default_val="0" is_reset="true" is_prog="true"/>
-       <port type="input" prefix="SE" size="1" is_global="true" default_val="0"/>
-       <port type="input" prefix="CFG_DONE" lib_name="CFGE" size="1" is_global="true" default_val="0" is_config_enable="true"/>
-       <port type="input" prefix="D" size="1"/>
-       <port type="input" prefix="SI" size="1"/>
-       <port type="output" prefix="Q" size="1"/>
-       <port type="output" prefix="CFGQN" size="1"/>
-       <port type="output" prefix="CFGQ" size="1"/>
-       <port type="clock" prefix="prog_clk" lib_name="CK" size="1" is_global="true" default_val="0" is_prog="true"/>
+    <circuit_model type="ccff" name="CFGSDFFR" prefix="CFGSDFFR" spice_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/spice/dff.sp" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/dff.v">
+      <design_technology type="cmos"/>
+      <input_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
+      <output_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
+      <port type="input" prefix="pReset" lib_name="RST" size="1" is_global="true" default_val="0" is_reset="true" is_prog="true"/>
+      <port type="input" prefix="SE" size="1" is_global="true" default_val="0"/>
+      <port type="input" prefix="CFG_DONE" lib_name="CFGE" size="1" is_global="true" default_val="0" is_config_enable="true"/>
+      <port type="input" prefix="D" size="1"/>
+      <port type="input" prefix="SI" size="1"/>
+      <port type="output" prefix="Q" size="1"/>
+      <port type="output" prefix="CFGQN" size="1"/>
+      <port type="output" prefix="CFGQ" size="1"/>
+      <port type="clock" prefix="prog_clk" lib_name="CK" size="1" is_global="true" default_val="0" is_prog="true"/>
     </circuit_model>
     <circuit_model type="iopad" name="EMBEDDED_IO_HD" prefix="EMBEDDED_IO_HD" is_default="true" verilog_netlist="${SKYWATER_OPENFPGA_HOME}/HDL/common/ql_io_logic.v">
       <design_technology type="cmos"/>
@@ -336,18 +331,9 @@
       <port type="output" prefix="SOC_OUT" lib_name="SOC_OUT" size="1" is_global="true" is_io="true" is_data_io="true"/>
       <port type="output" prefix="inpad" lib_name="FPGA_IN" size="1"/>
       <port type="input" prefix="outpad" lib_name="FPGA_OUT" size="1"/>
-	  
-	  <!-- 20210106 -->
-	  <!--port type="input" prefix="a2f_i" lib_name="SOC_IN" size="1" is_global="true" is_io="true" is_data_io="true"/-->
-	  <!--port type="output" prefix="f2a_o" lib_name="SOC_OUT" size="1" is_global="true" is_io="true" is_data_io="true"/-->
-	  <!--port type="output" prefix="a2f_o" lib_name="FPGA_IN" size="1"/-->
-	  <!--port type="input" prefix="f2a_i" lib_name="FPGA_OUT" size="1"/-->
-	  
-	  <!--port type="clock" prefix="clk" lib_name="CLK" size="1" is_global="false" default_val="0" /-->
-	  <port type="input" prefix="CFG_DONE" lib_name="CFG_DONE" size="1" is_global="true" default_val="0" is_config_enable="true"/>
-	  <port type="sram" prefix="io_dir" lib_name="FPGA_IO_DIR" size="1" mode_select="true" circuit_model_name="CFGSDFFR" default_val="1"/>
-	  <!-- 20210105 -->
 
+      <port type="input" prefix="CFG_DONE" lib_name="CFG_DONE" size="1" is_global="true" default_val="0" is_config_enable="true"/>
+      <port type="sram" prefix="io_dir" lib_name="FPGA_IO_DIR" size="1" mode_select="true" circuit_model_name="CFGSDFFR" default_val="1"/>
     </circuit_model>
     <circuit_model type="hard_logic" name="sky130_fd_sc_hd__mux2_1_wrapper" prefix="sky130_fd_sc_hd__mux2_1_wrapper" verilog_netlist="${SKYWATER_OPENFPGA_HOME}/HDL/common/sky130_fd_sc_hd_wrapper.v">
       <design_technology type="cmos"/>
@@ -382,27 +368,25 @@
     <direct name="scan_chain" circuit_model_name="direct_interc" type="column" x_dir="positive" y_dir="positive"/>
   </direct_connection>
   <tile_annotations>
-    <!--global_port name="clk" tile_port="clb.clk" is_clock="true" default_val="0"/-->
-	<global_port name="clk" is_clock="true" default_val="0">
-		<tile name="clb" port="clk" x="-1" y="-1"/>
-		<tile name="io_top" port="clk" x="-1" y="-1"/>
-		<tile name="io_right" port="clk" x="-1" y="-1"/>
-		<tile name="io_bottom" port="clk" x="-1" y="-1"/>
-		<tile name="io_left" port="clk" x="-1" y="-1"/>
-	</global_port>
-    <!--global_port name="Reset" tile_port="clb.reset" is_reset="true" default_val="1"/-->
-	<global_port name="reset" is_reset="true" default_val="0">
-		<tile name="clb" port="reset" x="-1" y="-1"/>
-		<tile name="io_top" port="reset" x="-1" y="-1"/>
-		<tile name="io_right" port="reset" x="-1" y="-1"/>
-		<tile name="io_bottom" port="reset" x="-1" y="-1"/>
-		<tile name="io_left" port="reset" x="-1" y="-1"/>
-	</global_port>
+    <global_port name="clk" is_clock="true" default_val="0">
+      <tile name="clb" port="clk" x="-1" y="-1"/>
+      <tile name="io_top" port="clk" x="-1" y="-1"/>
+      <tile name="io_right" port="clk" x="-1" y="-1"/>
+      <tile name="io_bottom" port="clk" x="-1" y="-1"/>
+      <tile name="io_left" port="clk" x="-1" y="-1"/>
+    </global_port>
+    <global_port name="reset" is_reset="true" default_val="0">
+      <tile name="clb" port="reset" x="-1" y="-1"/>
+      <tile name="io_top" port="reset" x="-1" y="-1"/>
+      <tile name="io_right" port="reset" x="-1" y="-1"/>
+      <tile name="io_bottom" port="reset" x="-1" y="-1"/>
+      <tile name="io_left" port="reset" x="-1" y="-1"/>
+    </global_port>
   </tile_annotations>
   <pb_type_annotations>
     <!-- physical pb_type binding in complex block IO -->
     <pb_type name="io" physical_mode_name="physical" idle_mode_name="inpad"/>
-    <!-- IMPORTANT: must set unused I/Os to operating in INPUT mode !!! -->
+      <!-- IMPORTANT: must set unused I/Os to operating in INPUT mode !!! -->
     <pb_type name="io[physical].iopad">
       <interconnect name="mux1" circuit_model_name="mux_1level_io"/>
       <interconnect name="mux2" circuit_model_name="mux_1level_io"/>
@@ -414,9 +398,9 @@
     <pb_type name="io[physical].iopad.ff" circuit_model_name="sky130_fd_sc_hd__sdfrtp_1"/>
     <pb_type name="io[io_input].io_input.ff" physical_pb_type_name="io[physical].iopad.ff"/>
     <pb_type name="io[io_output].io_output.ff" physical_pb_type_name="io[physical].iopad.ff"/>
-    <!-- End physical pb_type binding in complex block IO -->
+      <!-- End physical pb_type binding in complex block IO -->
 
-    <!-- physical pb_type binding in complex block CLB -->
+      <!-- physical pb_type binding in complex block CLB -->
     <pb_type name="clb.fle[physical].fabric">
       <!-- Binding interconnect to circuit models as their physical implementation, if not defined, we use the default model -->
       <interconnect name="mux1" circuit_model_name="mux_1level_fabric"/>
@@ -427,36 +411,23 @@
     </pb_type>	
     <!-- physical mode will be the default mode if not specified -->
     <pb_type name="clb.fle" physical_mode_name="physical"/>
-    <!--pb_type name="clb.fle[physical].fabric.frac_logic.frac_lut4" circuit_model_name="frac_lut4" mode_bits="0"/-->
-	<pb_type name="clb.fle[physical].fabric.frac_logic.frac_lut4" circuit_model_name="frac_lut4"/>
+    <pb_type name="clb.fle[physical].fabric.frac_logic.frac_lut4" circuit_model_name="frac_lut4"/>
     <pb_type name="clb.fle[physical].fabric.frac_logic.carry_follower" circuit_model_name="sky130_fd_sc_hd__mux2_1_wrapper"/>
     <pb_type name="clb.fle[physical].fabric.ff" circuit_model_name="sky130_fd_sc_hd__sdfrtp_1"/>
     <!-- Binding operating pb_type to physical pb_type -->
-    <!--pb_type name="clb.fle[n2_lut3].lut3inter.ble3.lut3" physical_pb_type_name="clb.fle[physical].fabric.frac_logic.frac_lut4" mode_bits="1" physical_pb_type_index_factor="0.5"-->
-      <!-- Binding the lut3 to the first 3 inputs of fracturable lut4 -->
-      <!--port name="in" physical_mode_port="in[0:2]"/-->
-      <!--port name="out" physical_mode_port="lut3_out[0:0]" physical_mode_pin_rotate_offset="1"/-->
-    <!--/pb_type-->
-    <!--pb_type name="clb.fle[n2_lut3].lut3inter.ble3.ff" physical_pb_type_name="clb.fle[physical].fabric.ff"/-->
-    <!-- Binding operating pb_types in mode 'ble4' -->
-    <!--pb_type name="clb.fle[n1_lut4].ble4.lut4" physical_pb_type_name="clb.fle[physical].fabric.frac_logic.frac_lut4" mode_bits="0"-->
-	<pb_type name="clb.fle[n1_lut4].ble4.lut4" physical_pb_type_name="clb.fle[physical].fabric.frac_logic.frac_lut4">
+    <pb_type name="clb.fle[n1_lut4].ble4.lut4" physical_pb_type_name="clb.fle[physical].fabric.frac_logic.frac_lut4">
       <!-- Binding the lut4 to the first 4 inputs of fracturable lut4 -->
       <port name="in" physical_mode_port="in[0:3]"/>
       <port name="out" physical_mode_port="lut4_out"/>
     </pb_type>
     <pb_type name="clb.fle[n1_lut4].ble4.ff" physical_pb_type_name="clb.fle[physical].fabric.ff"/>
-    <!-- Binding operating pb_types in mode 'shift_register' -->
+      <!-- Binding operating pb_types in mode 'shift_register' -->
     <pb_type name="clb.fle[shift_register].shift_reg.ff" physical_pb_type_name="clb.fle[physical].fabric.ff"/>
-	<!-- kliao 2021-0112-->
-	<pb_type name="clb.fle[soft_adder].ble4.lut4" physical_pb_type_name="clb.fle[physical].fabric.frac_logic.frac_lut4">
+    <pb_type name="clb.fle[soft_adder].ble4.lut4" physical_pb_type_name="clb.fle[physical].fabric.frac_logic.frac_lut4">
       <port name="in" physical_mode_port="in[0:3]"/>
       <port name="out" physical_mode_port="lut4_out"/>
     </pb_type>
     <pb_type name="clb.fle[soft_adder].ble4.ff" physical_pb_type_name="clb.fle[physical].fabric.ff"/>
-	
-	
-	
     <!-- End physical pb_type binding in complex block IO -->
   </pb_type_annotations>
 </openfpga_architecture>
diff --git a/ARCH/vpr_arch/k4_N8_tileable_reset_softadder_register_scan_chain_nonLR_caravel_io_skywater130nm.xml b/ARCH/vpr_arch/k4_N8_tileable_reset_softadder_register_scan_chain_nonLR_caravel_io_skywater130nm.xml
index d110543..f141b76 100644
--- a/ARCH/vpr_arch/k4_N8_tileable_reset_softadder_register_scan_chain_nonLR_caravel_io_skywater130nm.xml
+++ b/ARCH/vpr_arch/k4_N8_tileable_reset_softadder_register_scan_chain_nonLR_caravel_io_skywater130nm.xml
@@ -1,27 +1,27 @@
 <!-- 
-  Low-cost homogeneous FPGA Architecture.
+Low-cost homogeneous FPGA Architecture.
 
-  - Skywater 130 nm technology
-  - General purpose logic block: 
-    K = 4, N = 8, fracturable 4 LUTs (can operate as one 4-LUT or two 3-LUTs with all 3 inputs shared) 
-    with optionally registered outputs
-  - Routing architecture:
-      - 10% L = 1, fc_in = 0.15, Fc_out = 0.10
-      - 10% L = 2, fc_in = 0.15, Fc_out = 0.10
-      - 80% L = 4, fc_in = 0.15, Fc_out = 0.10
-      - 100 routing tracks per channel
+- Skywater 130 nm technology
+- General purpose logic block: 
+K = 4, N = 8, fracturable 4 LUTs (can operate as one 4-LUT or two 3-LUTs with all 3 inputs shared) 
+with optionally registered outputs
+- Routing architecture:
+- 10% L = 1, fc_in = 0.15, Fc_out = 0.10
+- 10% L = 2, fc_in = 0.15, Fc_out = 0.10
+- 80% L = 4, fc_in = 0.15, Fc_out = 0.10
+- 100 routing tracks per channel
 
-  Authors: Xifan Tang
+Authors: Xifan Tang
 -->
 <architecture>
   <!-- 
-       ODIN II specific config begins 
-       Describes the types of user-specified netlist blocks (in blif, this corresponds to 
-       ".model [type_of_block]") that this architecture supports.
+    ODIN II specific config begins 
+    Describes the types of user-specified netlist blocks (in blif, this corresponds to 
+    ".model [type_of_block]") that this architecture supports.
 
-       Note: Basic LUTs, I/Os, and flip-flops are not included here as there are 
-       already special structures in blif (.names, .input, .output, and .latch) 
-       that describe them.
+    Note: Basic LUTs, I/Os, and flip-flops are not included here as there are 
+    already special structures in blif (.names, .input, .output, and .latch) 
+    that describe them.
   -->
   <models>
     <!-- A virtual model for I/O to be used in the physical mode of io block -->
@@ -68,19 +68,17 @@
   </models>
   <tiles>
     <!-- Do NOT add clock pins to I/O here!!! VPR does not build clock network in the way that OpenFPGA can support
-         If you need to register the I/O, define clocks in the circuit models
-         These clocks can be handled in back-end
-     -->
+      If you need to register the I/O, define clocks in the circuit models
+      These clocks can be handled in back-end
+    -->
     <!-- Top-side has 1 I/O per tile -->
     <tile name="io_top" capacity="16" area="0">
       <equivalent_sites>
         <site pb_type="io"/>
       </equivalent_sites>
       <clock name="clk" num_pins="1"/>
-      <!--input name="a2f_i" num_pins="1"/-->
       <input name="f2a_i" num_pins="1"/>
       <output name="a2f_o" num_pins="1"/>
-      <!--output name="f2a_o" num_pins="1"/-->
       <input name="sc_in" num_pins="1"/>
       <output name="sc_out" num_pins="1"/>
       <input name="reset" num_pins="1" is_non_clock_global="true"/>
@@ -202,7 +200,7 @@
       <col type="io_left" startx="0" priority="100"/>
       <col type="io_right" startx="W-1" priority="100"/>
       <corners type="EMPTY" priority="101"/>
-      <!--Fill with 'clb'-->
+        <!--Fill with 'clb'-->
       <fill type="clb" priority="10"/>
     </auto_layout>
     <fixed_layout name="2x2" width="4" height="4">
@@ -212,7 +210,7 @@
       <col type="io_left" startx="0" priority="100"/>
       <col type="io_right" startx="W-1" priority="100"/>
       <corners type="EMPTY" priority="101"/>
-      <!--Fill with 'clb'-->
+        <!--Fill with 'clb'-->
       <fill type="clb" priority="10"/>
     </fixed_layout>
     <fixed_layout name="12x12" width="14" height="14">
@@ -222,7 +220,7 @@
       <col type="io_left" startx="0" priority="100"/>
       <col type="io_right" startx="W-1" priority="100"/>
       <corners type="EMPTY" priority="101"/>
-      <!--Fill with 'clb'-->
+        <!--Fill with 'clb'-->
       <fill type="clb" priority="10"/>
     </fixed_layout>
     <fixed_layout name="32x32" width="34" height="34">
@@ -232,30 +230,30 @@
       <col type="io_left" startx="0" priority="100"/>
       <col type="io_right" startx="W-1" priority="100"/>
       <corners type="EMPTY" priority="101"/>
-      <!--Fill with 'clb'-->
+        <!--Fill with 'clb'-->
       <fill type="clb" priority="10"/>
     </fixed_layout>
   </layout>
   <device>
     <!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM 
-			     models. We are modifying the delay values however, to include metal C and R, which allows more architecture
-			     experimentation. We are also modifying the relative resistance of PMOS to be 1.8x that of NMOS
-			     (vs. Ian's 3x) as 1.8x lines up with Jeff G's data from a 45 nm process (and is more typical of 
-			     45 nm in general). I'm upping the Rmin_nmos from Ian's just over 6k to nearly 9k, and dropping 
-			     RminW_pmos from 18k to 16k to hit this 1.8x ratio, while keeping the delays of buffers approximately
-			     lined up with Stratix IV. 
-			     We are using Jeff G.'s capacitance data for 45 nm (in tech/ptm_45nm).
-			     Jeff's tables list C in for transistors with widths in multiples of the minimum feature size (45 nm).
-			     The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply drive strength sizes in this file
-	                     by 2.5x when looking up in Jeff's tables.
-			     The delay values are lined up with Stratix IV, which has an architecture similar to this
-			     proposed FPGA, and which is also 40 nm 
-			     C_ipin_cblock: input capacitance of a track buffer, which VPR assumes is a single-stage
-			     4x minimum drive strength buffer. -->
+      models. We are modifying the delay values however, to include metal C and R, which allows more architecture
+      experimentation. We are also modifying the relative resistance of PMOS to be 1.8x that of NMOS
+      (vs. Ian's 3x) as 1.8x lines up with Jeff G's data from a 45 nm process (and is more typical of 
+      45 nm in general). I'm upping the Rmin_nmos from Ian's just over 6k to nearly 9k, and dropping 
+      RminW_pmos from 18k to 16k to hit this 1.8x ratio, while keeping the delays of buffers approximately
+      lined up with Stratix IV. 
+      We are using Jeff G.'s capacitance data for 45 nm (in tech/ptm_45nm).
+      Jeff's tables list C in for transistors with widths in multiples of the minimum feature size (45 nm).
+      The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply drive strength sizes in this file
+      by 2.5x when looking up in Jeff's tables.
+      The delay values are lined up with Stratix IV, which has an architecture similar to this
+      proposed FPGA, and which is also 40 nm 
+      C_ipin_cblock: input capacitance of a track buffer, which VPR assumes is a single-stage
+    4x minimum drive strength buffer. -->
     <sizing R_minW_nmos="8926" R_minW_pmos="16067"/>
-    <!-- The grid_logic_tile_area below will be used for all blocks that do not explicitly set their own (non-routing)
-     	  area; set to 0 since we explicitly set the area of all blocks currently in this architecture file.
-	  -->
+      <!-- The grid_logic_tile_area below will be used for all blocks that do not explicitly set their own (non-routing)
+        area; set to 0 since we explicitly set the area of all blocks currently in this architecture file.
+      -->
     <area grid_logic_tile_area="0"/>
     <chan_width_distr>
       <x distr="uniform" peak="1.000000"/>
@@ -266,28 +264,28 @@
   </device>
   <switchlist>
     <!-- VB: the mux_trans_size and buf_size data below is in minimum width transistor *areas*, assuming the purple
-	       book area formula. This means the mux transistors are about 5x minimum drive strength.
-	       We assume the first stage of the buffer is 3x min drive strength to be reasonable given the large 
-	       mux transistors, and this gives a reasonable stage ratio of a bit over 5x to the second stage. We assume
-	       the n and p transistors in the first stage are equal-sized to lower the buffer trip point, since it's fed
-	       by a pass transistor mux. We can then reverse engineer the buffer second stage to hit the specified 
-	       buf_size (really buffer area) - 16.2x minimum drive nmos and 1.8*16.2 = 29.2x minimum drive.
-	       I then took the data from Jeff G.'s PTM modeling of 45 nm to get the Cin (gate of first stage) and Cout 
-	       (diff of second stage) listed below.  Jeff's models are in tech/ptm_45nm, and are in min feature multiples.
-	       The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply the drive strength sizes above by 
-	       2.5x when looking up in Jeff's tables.
-	       Finally, we choose a switch delay (58 ps) that leads to length 4 wires having a delay equal to that of SIV of 126 ps.
-	       This also leads to the switch being 46% of the total wire delay, which is reasonable. -->
+      book area formula. This means the mux transistors are about 5x minimum drive strength.
+      We assume the first stage of the buffer is 3x min drive strength to be reasonable given the large 
+      mux transistors, and this gives a reasonable stage ratio of a bit over 5x to the second stage. We assume
+      the n and p transistors in the first stage are equal-sized to lower the buffer trip point, since it's fed
+      by a pass transistor mux. We can then reverse engineer the buffer second stage to hit the specified 
+      buf_size (really buffer area) - 16.2x minimum drive nmos and 1.8*16.2 = 29.2x minimum drive.
+      I then took the data from Jeff G.'s PTM modeling of 45 nm to get the Cin (gate of first stage) and Cout 
+      (diff of second stage) listed below.  Jeff's models are in tech/ptm_45nm, and are in min feature multiples.
+      The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply the drive strength sizes above by 
+      2.5x when looking up in Jeff's tables.
+      Finally, we choose a switch delay (58 ps) that leads to length 4 wires having a delay equal to that of SIV of 126 ps.
+    This also leads to the switch being 46% of the total wire delay, which is reasonable. -->
     <switch type="mux" name="L1_mux" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
     <switch type="mux" name="L2_mux" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
     <switch type="mux" name="L4_mux" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
-    <!--switch ipin_cblock resistance set to yeild for 4x minimum drive strength buffer-->
+      <!--switch ipin_cblock resistance set to yeild for 4x minimum drive strength buffer-->
     <switch type="mux" name="ipin_cblock" R="2231.5" Cout="0." Cin="1.47e-15" Tdel="7.247000e-11" mux_trans_size="1.222260" buf_size="auto"/>
   </switchlist>
   <segmentlist>
     <!--- VB & JL: using ITRS metal stack data, 96 nm half pitch wires, which are intermediate metal width/space.  
-			     With the 96 nm half pitch, such wires would take 60 um of height, vs. a 90 nm high (approximated as square) Stratix IV tile so this seems
-			     reasonable. Using a tile length of 90 nm, corresponding to the length of a Stratix IV tile if it were square. -->
+      With the 96 nm half pitch, such wires would take 60 um of height, vs. a 90 nm high (approximated as square) Stratix IV tile so this seems
+    reasonable. Using a tile length of 90 nm, corresponding to the length of a Stratix IV tile if it were square. -->
     <!-- GIVE a specific name for the segment! OpenFPGA appreciate that! -->
     <segment name="L1" freq="0.20" length="1" type="unidir" Rmetal="101" Cmetal="22.5e-15">
       <mux name="L1_mux"/>
@@ -314,21 +312,17 @@
     <!-- Define input pads begin -->
     <pb_type name="io">
       <clock name="clk" num_pins="1"/>
-      <!--input name="a2f_i" num_pins="1"/-->
       <input name="f2a_i" num_pins="1"/>
       <output name="a2f_o" num_pins="1"/>
-      <!--output name="f2a_o" num_pins="1"/-->
       <input name="sc_in" num_pins="1"/>
       <output name="sc_out" num_pins="1"/>
       <input name="reset" num_pins="1" is_non_clock_global="true"/>
-      <!-- Physical mode definition begin (physical implementation of the io) -->
+        <!-- Physical mode definition begin (physical implementation of the io) -->
       <mode name="physical" disabled_in_pack="true">
         <pb_type name="iopad" num_pb="1">
           <clock name="clk" num_pins="1"/>
-          <!--input name="a2f_i" num_pins="1"/-->
           <input name="f2a_i" num_pins="1"/>
           <output name="a2f_o" num_pins="1"/>
-          <!--output name="f2a_o" num_pins="1"/-->
           <input name="sc_in" num_pins="1"/>
           <input name="reset" num_pins="1"/>
           <output name="sc_out" num_pins="1"/>
@@ -351,7 +345,6 @@
             <direct name="ff[0:0]-clk" input="iopad.clk" output="ff[0:0].clk"/>
             <direct name="ff[1:1]-clk" input="iopad.clk" output="ff[1:1].clk"/>
             <direct name="ff[0:0]-D" input="iopad.f2a_i" output="ff[0:0].D" />
-            <!--direct name="ff[1:1]-D" input="iopad.a2f_i" output="ff[1:1].D" /-->
             <direct name="ff[1:1]-D" input="pad.inpad" output="ff[1:1].D"/>
             <direct name="ff[0:0]-DI" input="iopad.sc_in" output="ff[0:0].DI"/>
             <direct name="ff[1:1]-DI" input="ff[0:0].Q" output="ff[1:1].DI"/>
@@ -369,10 +362,8 @@
         </pb_type>
         <interconnect>
           <direct name="direct1" input="io.clk" output="iopad.clk"/>
-          <!--direct name="direct2" input="io.a2f_i" output="iopad.a2f_i"/-->
           <direct name="direct3" input="io.f2a_i" output="iopad.f2a_i"/>
           <direct name="direct4" input="iopad.a2f_o" output="io.a2f_o"/>
-          <!--direct name="direct5" input="iopad.f2a_o" output="io.f2a_o"/-->
           <direct name="direct6" input="io.sc_in" output="iopad.sc_in"/>
           <direct name="direct7" input="iopad.sc_out" output="io.sc_out"/>
           <direct name="direct8" input="io.reset" output="iopad.reset"/>
@@ -441,10 +432,10 @@
     <!-- Define I/O pads ends -->
     <!-- Define general purpose logic block (CLB) begin -->
     <!-- -Due to the absence of local routing, 
-         the 4 inputs of fracturable LUT4 are no longer equivalent, 
-         because the 4th input can not be switched when the dual-LUT3 modes are used.
-         So pin equivalence should be applied to the first 3 inputs only
-	  -->
+      the 4 inputs of fracturable LUT4 are no longer equivalent, 
+      because the 4th input can not be switched when the dual-LUT3 modes are used.
+      So pin equivalence should be applied to the first 3 inputs only
+    -->
     <pb_type name="clb">
       <input name="I" num_pins="24" equivalent="full"/>
       <input name="reg_in" num_pins="1"/>
@@ -457,9 +448,9 @@
       <output name="cout" num_pins="1"/>
       <output name="cout_copy" num_pins="1"/>
       <clock name="clk" num_pins="1"/>
-      <!-- Describe fracturable logic element.  
-             Each fracturable logic element has a 6-LUT that can alternatively operate as two 5-LUTs with shared inputs. 
-             The outputs of the fracturable logic element can be optionally registered
+        <!-- Describe fracturable logic element.  
+          Each fracturable logic element has a 6-LUT that can alternatively operate as two 5-LUTs with shared inputs. 
+          The outputs of the fracturable logic element can be optionally registered
         -->
       <pb_type name="fle" num_pb="8">
         <input name="in" num_pins="4"/>
@@ -472,7 +463,7 @@
         <output name="sc_out" num_pins="1"/>
         <output name="cout" num_pins="1"/>
         <clock name="clk" num_pins="1"/>
-        <!-- Physical mode definition begin (physical implementation of the fle) -->
+          <!-- Physical mode definition begin (physical implementation of the fle) -->
         <mode name="physical" disabled_in_pack="true">
           <pb_type name="fabric" num_pb="1">
             <input name="in" num_pins="4"/>
@@ -490,7 +481,7 @@
               <input name="cin" num_pins="1"/>
               <output name="out" num_pins="1"/>
               <output name="cout" num_pins="1"/>
-              <!-- Define LUT -->
+                <!-- Define LUT -->
               <pb_type name="frac_lut4" blif_model=".subckt frac_lut4" num_pb="1">
                 <input name="in" num_pins="4"/>
                 <output name="lut2_out" num_pins="2"/>
@@ -509,7 +500,6 @@
                 <direct name="direct4" input="frac_lut4.lut2_out[1:1]" output="carry_follower.a"/>
                 <direct name="direct5" input="frac_lut4.lut2_out[0:0]" output="carry_follower.cin"/>
                 <direct name="direct6" input="carry_follower.cout" output="frac_logic.cout"/>
-                <!-- Xifan Tang: I use out[0] because the output of lut6 in lut6 mode is wired to the out[0] -->
                 <direct name="direct7" input="frac_lut4.lut4_out" output="frac_logic.out"/>
                 <mux name="mux2" input="frac_logic.cin frac_logic.in[2:2]" output="frac_lut4.in[2:2]"/>
               </interconnect>
@@ -528,8 +518,8 @@
             </pb_type>         
             <interconnect>
               <direct name="direct1" input="fabric.in" output="frac_logic.in"/>
-	      <direct name="direct2" input="fabric.sc_in" output="ff.DI"/>
-	      <direct name="direct3" input="fabric.cin" output="frac_logic.cin"/>
+              <direct name="direct2" input="fabric.sc_in" output="ff.DI"/>
+              <direct name="direct3" input="fabric.cin" output="frac_logic.cin"/>
               <direct name="direct4" input="ff.Q" output="fabric.sc_out"/>
               <direct name="direct5" input="ff.Q" output="fabric.reg_out"/>
               <direct name="direct6" input="frac_logic.cout" output="fabric.cout"/>
@@ -566,20 +556,20 @@
             <input name="in" num_pins="4"/>
             <output name="out" num_pins="1"/>
             <clock name="clk" num_pins="1"/>
-            <!-- Define LUT -->
+              <!-- Define LUT -->
             <pb_type name="lut4" blif_model=".names" num_pb="1" class="lut">
               <input name="in" num_pins="4" port_class="lut_in"/>
               <output name="out" num_pins="1" port_class="lut_out"/>
-              <!-- LUT timing using delay matrix -->
-              <!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
-                       we instead take the average of these numbers to get more stable results
+                <!-- LUT timing using delay matrix -->
+                <!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
+                  we instead take the average of these numbers to get more stable results
                   82e-12
                   173e-12
                   261e-12
                   263e-12
                   398e-12
                   397e-12
-                  -->
+                -->
               <delay_matrix type="max" in_port="lut4.in" out_port="lut4.out">
                 261e-12
                 261e-12
@@ -648,13 +638,13 @@
       </pb_type>
       <interconnect>
         <!-- We use direct connections to reduce the area to the most
-             The global local routing is going to compensate the loss in routability
-          -->
+          The global local routing is going to compensate the loss in routability
+        -->
         <!-- FIXME: The implicit port definition results in I0[0] connected to
-                    in[2]. Such twisted connection is not expected.
-                    I[0] should be connected to in[0]
-	  -->
-	<complete name="crossbar" input="clb.I fle[7:0].out" output="fle[7:0].in">
+          in[2]. Such twisted connection is not expected.
+          I[0] should be connected to in[0]
+        -->
+        <complete name="crossbar" input="clb.I fle[7:0].out" output="fle[7:0].in">
           <!-- TODO: Timing should be backannotated from post-PnR results -->
         </complete>
         <complete name="clks" input="clb.clk" output="fle[7:0].clk">
@@ -662,24 +652,21 @@
         <complete name="resets" input="clb.reset" output="fle[7:0].reset">
         </complete>
         <!-- This way of specifying direct connection to clb outputs is important because this architecture uses automatic spreading of opins.  
-               By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs, 
-               then the outputs those 6-LUTs take get evenly distributed across all four sides of the CLB instead of clumped on two sides (which is what happens with a more
-               naive specification).
-          -->
+          By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs, 
+          then the outputs those 6-LUTs take get evenly distributed across all four sides of the CLB instead of clumped on two sides (which is what happens with a more
+          naive specification).
+        -->
         <direct name="clbouts1" input="fle[3:0].out" output="clb.O[3:0]"/>
-	<direct name="clbouts2" input="fle[7:4].out" output="clb.O[7:4]"/>
-	<direct name="cout_copy" input="fle[7:7].cout" output="clb.cout_copy"/>
-        <!-- Shift register chain links -->
+        <direct name="clbouts2" input="fle[7:4].out" output="clb.O[7:4]"/>
+        <direct name="cout_copy" input="fle[7:7].cout" output="clb.cout_copy"/>
+          <!-- Shift register chain links -->
         <direct name="shift_register_in" input="clb.reg_in" output="fle[0:0].reg_in">
           <!-- Put all inter-block carry chain delay on this one edge -->
           <delay_constant max="0.16e-9" in_port="clb.reg_in" out_port="fle[0:0].reg_in"/>
-          <!--pack_pattern name="chain" in_port="clb.reg_in" out_port="fle[0:0].reg_in"/-->
         </direct>
         <direct name="shift_register_out" input="fle[7:7].reg_out" output="clb.reg_out">
-          <!--pack_pattern name="chain" in_port="fle[7:7].reg_out" out_port="clb.reg_out"/-->
         </direct>
         <direct name="shift_register_link" input="fle[6:0].reg_out" output="fle[7:1].reg_in">
-          <!--pack_pattern name="chain" in_port="fle[6:0].reg_out" out_port="fle[7:1].reg_in"/-->
         </direct>
         <!-- Scan chain links -->
         <direct name="scan_chain_in" input="clb.sc_in" output="fle[0:0].sc_in">