diff --git a/docs/source/yosys_internals/formats/cell_library.rst b/docs/source/yosys_internals/formats/cell_library.rst
index c80b07402..a4e5adfb7 100644
--- a/docs/source/yosys_internals/formats/cell_library.rst
+++ b/docs/source/yosys_internals/formats/cell_library.rst
@@ -619,6 +619,52 @@ Finite state machines
 
 Add a brief description of the ``$fsm`` cell type.
 
+Coarse arithmetics
+~~~~~~~~~~~~~~~~~~~~~
+
+The ``$macc`` cell type represents a multiply and accumulate block, for summing any number of negated and unnegated signals and arithmetic products of pairs of signals. Cell port A concatenates pairs of signals to be multiplied together. When the second signal in a pair is zero length, a constant 1 is used instead as the second factor. Cell port B concatenates 1-bit-wide signals to also be summed, such as "carry in" in adders. 
+
+The cell's ``CONFIG`` parameter determines the layout of cell port ``A``.
+In the terms used for this cell, there's mixed meanings for the term "port". To disambiguate:
+A cell port is for example the A input (it is constructed in C++ as ``cell->setPort(ID::A, ...))``
+Multiplier ports are pairs of multiplier inputs ("factors").
+If the second signal in such a pair is zero length, no multiplication is necessary, and the first signal is just added to the sum.
+
+In this pseudocode, ``u(foo)`` means an unsigned int that's foo bits long.
+The CONFIG parameter carries the following information:
+.. code-block::
+	:force:
+	struct CONFIG {
+		u4 num_bits;
+		struct port_field {
+			bool is_signed;
+			bool is_subtract;
+			u(num_bits) factor1_len;
+			u(num_bits) factor2_len;
+		}[num_ports];
+	};
+
+The A cell port carries the following information:
+.. code-block::
+	:force:
+	struct A {
+		u(CONFIG.port_field[0].factor1_len) port0factor1;
+		u(CONFIG.port_field[0].factor2_len) port0factor2;
+		u(CONFIG.port_field[1].factor1_len) port1factor1;
+		u(CONFIG.port_field[1].factor2_len) port1factor2;
+		...
+	};
+
+No factor1 may have a zero length.
+A factor2 having a zero length implies factor2 is replaced with a constant 1.
+
+Additionally, B is an array of 1-bit-wide unsigned integers to also be summed up.
+Finally, we have:
+.. code-block::
+	:force:
+	Y = port0factor1 * port0factor2 + port1factor1 * port1factor2 + ...
+		* B[0] + B[1] + ...
+
 Specify rules
 ~~~~~~~~~~~~~
 
@@ -1152,4 +1198,4 @@ file via ABC using the abc pass.
 
 .. todo:: Add information about ``$lut`` and ``$sop`` cells.
 
-.. todo:: Add information about ``$alu``, ``$macc``, ``$fa``, and ``$lcu`` cells.
+.. todo:: Add information about ``$alu``, ``$fa``, and ``$lcu`` cells.
diff --git a/techlibs/common/simlib.v b/techlibs/common/simlib.v
index 489281f26..1383a2a13 100644
--- a/techlibs/common/simlib.v
+++ b/techlibs/common/simlib.v
@@ -902,18 +902,29 @@ endgenerate
 endmodule
 
 // --------------------------------------------------------
-
+//  |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
+//-
+//-     $macc (A, B, Y)
+//-
+//- Multiply and accumulate.
+//- A building block for summing any number of negated and unnegated signals and arithmetic products of pairs of signals. Cell port A concatenates pairs of signals to be multiplied together. When the second signal in a pair is zero length, a constant 1 is used instead as the second factor. Cell port B concatenates 1-bit-wide signals to also be summed, such as "carry in" in adders.
+//- Typically created by the `alumacc` pass, which transforms $add and $mul into $macc cells.
 module \$macc (A, B, Y);
 
 parameter A_WIDTH = 0;
 parameter B_WIDTH = 0;
 parameter Y_WIDTH = 0;
+// CONFIG determines the layout of A, as explained below
 parameter CONFIG = 4'b0000;
 parameter CONFIG_WIDTH = 4;
 
-input [A_WIDTH-1:0] A;
-input [B_WIDTH-1:0] B;
-output reg [Y_WIDTH-1:0] Y;
+// In the terms used for this cell, there's mixed meanings for the term "port". To disambiguate:
+// A cell port is for example the A input (it is constructed in C++ as cell->setPort(ID::A, ...))
+// Multiplier ports are pairs of multiplier inputs ("factors").
+// If the second signal in such a pair is zero length, no multiplication is necessary, and the first signal is just added to the sum.
+input [A_WIDTH-1:0] A; // Cell port A is the concatenation of all arithmetic ports
+input [B_WIDTH-1:0] B; // Cell port B is the concatenation of single-bit unsigned signals to be also added to the sum
+output reg [Y_WIDTH-1:0] Y; // Output sum
 
 // Xilinx XSIM does not like $clog2() below..
 function integer my_clog2;
@@ -929,10 +940,42 @@ function integer my_clog2;
 	end
 endfunction
 
+// Bits that a factor's length field in CONFIG per factor in cell port A
 localparam integer num_bits = CONFIG[3:0] > 0 ? CONFIG[3:0] : 1;
+// Number of multiplier ports
 localparam integer num_ports = (CONFIG_WIDTH-4) / (2 + 2*num_bits);
+// Minium bit width of an induction variable to iterate over all bits of cell port A
 localparam integer num_abits = my_clog2(A_WIDTH) > 0 ? my_clog2(A_WIDTH) : 1;
 
+// In this pseudocode, u(foo) means an unsigned int that's foo bits long.
+// The CONFIG parameter carries the following information:
+//	struct CONFIG {
+//		u4 num_bits;
+//		struct port_field {
+//			bool is_signed;
+//			bool is_subtract;
+//			u(num_bits) factor1_len;
+//			u(num_bits) factor2_len;
+//		}[num_ports];
+//	};
+
+// The A cell port carries the following information:
+//	struct A {
+//		u(CONFIG.port_field[0].factor1_len) port0factor1;
+//		u(CONFIG.port_field[0].factor2_len) port0factor2;
+//		u(CONFIG.port_field[1].factor1_len) port1factor1;
+//		u(CONFIG.port_field[1].factor2_len) port1factor2;
+//		...
+//	};
+// and log(sizeof(A)) is num_abits.
+// No factor1 may have a zero length.
+// A factor2 having a zero length implies factor2 is replaced with a constant 1.
+
+// Additionally, B is an array of 1-bit-wide unsigned integers to also be summed up.
+// Finally, we have:
+// Y = port0factor1 * port0factor2 + port1factor1 * port1factor2 + ...
+//     * B[0] + B[1] + ...
+
 function [2*num_ports*num_abits-1:0] get_port_offsets;
 	input [CONFIG_WIDTH-1:0] cfg;
 	integer i, cursor;