Merge branch 'dev' into HEAD

2022-03-30 15:09:59 +02:00
parent 7db428b827 87f552b1d0
commit e09b4a0f7e
35 changed files with 59059 additions and 913 deletions
--- a/dataflow_neuro/cell_lib_async.act
+++ b/dataflow_neuro/cell_lib_async.act
@ -26,6 +26,33 @@

 namespace tmpl {
    namespace dataflow_neuro{
+        export defcell KEEP_X1 (bool y; bool vdd, vss) {
+            bool _y;
+            prs{
+                y => _y-
+                [weak=1] _y => y-
+                }
+            sizing {
+                leak_adjust <- 1; 
+                p_n_mode <- 1;
+                y {-1}; _y{-1} 
+                }
+        }
+
+        export defcell A_1C2N_RB_X1 (bool! y; bool? c1,n1,n2,pr_B, sr_B; bool vdd, vss) {
+            bool _y;
+            prs{
+                (~c1)|~pr_B -> _y+
+                c1 & n1 & n2 & sr_B -> _y-
+                _y => y-
+                }
+            sizing {
+                    leak_adjust <- 1; 
+                    p_n_mode <- 1;
+                    y {-1}; _y{-1} 
+                    }
+        }
+
        export defcell A_1C1P2N_RB_X1 (bool! y; bool? c1,p1,n1,n2,pr_B, sr_B; bool vdd, vss) {
            bool _y;
            prs{
@ -41,6 +68,22 @@ namespace tmpl {
                    }
        }

+
+        export defcell A_2C1P1N_RB_X1 (bool! y; bool? c1,c2,p1,n1,pr_B,sr_B; bool vdd, vss) {
+            bool _y;
+            prs{
+                (~p1 & ~c1 & ~c2)|~pr_B -> _y+
+                c1 & c2 & n1 & sr_B -> _y-
+                _y => y-
+
+                }
+            sizing {
+                    leak_adjust <- 1; 
+                    p_n_mode <- 1;
+                    y {-1}; _y{-1} 
+                    }
+        }
+
        export defcell A_1C1P2N_R_X1 (bool! y; bool? c1,p1,n1,n2,pr_B, sr_B; bool vdd, vss) {
            prs{
                (~p1 & ~c1)|~pr_B -> y-
--- a/dataflow_neuro/coders.act
+++ b/dataflow_neuro/coders.act
@ -48,6 +48,45 @@ open std::data;
 namespace tmpl {
 	namespace dataflow_neuro {

+		/**
+		 * Dualrail decoder.
+		 * Nc is the number of dualrail input channels.
+		 * Then builds N output AND gates, connecting to the right input wires.
+		 */
+		export template<pint Nc, N>
+		defproc decoder_dualrail (Mx1of2<Nc> in; bool? out[N]; power supply) {
+			// signal buffers
+			sigbuf<N> in_tX[Nc];
+			sigbuf<N> in_fX[Nc];	
+			(i:Nc:
+				in_tX[i].supply = supply;
+				in_tX[i].in = in.d[i].t;
+
+				in_fX[i].supply = supply;
+				in_fX[i].in = in.d[i].f;
+			)
+
+			// AND trees
+			pint bitval;
+			andtree<Nc> atree[N];
+			(k:0..N-1:atree[k].supply = supply;)
+			(i:0..N-1:
+				(j:0..Nc-1:
+					bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
+					[bitval = 1 ->
+						atree[i].in[j] = in_tX[j].out[i];
+						// atree[i].in[j] = addr_buf.out.d.d[j].t;
+						[]bitval = 0 ->
+						atree[i].in[j] = in_fX[j].out[i];
+						// atree[i].in[j] = addr_buf.out.d.d[j].f;
+						[]bitval >= 2 -> {false : "fuck"};
+						]
+					atree[i].out = out[i];
+					)
+				)
+		}
+
+
 		/**
 		 * 2D decoder which uses a configurable delay from the VCtrees to buffer ack.
 		 *	Nx is the x size of the decoder array
@ -63,9 +102,6 @@ namespace tmpl {

 			// Buffer to recieve concat(x,y) address packet
 			buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
-			// NEED TO BUFFER OUTPUTS FROM BUFFER I RECKON
-
-

 			// Validity trees
 			vtree<NxC> vtree_x (.supply = supply);
@ -91,39 +127,140 @@ namespace tmpl {
 			// FOR TESTING PURPOSES
 			// !!!!!!!!!!!!!!!!

-			// AND trees
-			pint bitval;
-			andtree<NxC> atree_x[Nx];
-			(k:0..Nx-1:atree_x[k].supply = supply;)
-			(i:0..Nx-1:
-				(j:0..NxC-1:
-					bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
-					[bitval = 1 -> 
-						atree_x[i].in[j] = addr_buf.out.d.d[j].t;
-						[]bitval = 0 ->
-						atree_x[i].in[j] = addr_buf.out.d.d[j].f;
-						[]bitval >= 2 -> {false : "fuck"};
-						]
-					atree_x[i].out = outx[i];
-					)
-				)

-			andtree<NyC> atree_y[Ny];
-			(k:0..Ny-1:atree_y[k].supply = supply;)
-			(i:0..Ny-1:
-				(j:0..NyC-1:
-					bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
-					[bitval = 1 -> 
-						atree_y[i].in[j] = addr_buf.out.d.d[j+NxC].t;
-						[]bitval = 0 ->
-						atree_y[i].in[j] = addr_buf.out.d.d[j+NxC].f;
-						]
-					atree_y[i].out = outy[i];
-					)
-				)
+			// Decoder X/Y And trees
+			decoder_dualrail<NxC,Nx> d_dr_x(.out = outx, .supply = supply);
+			(i:0..NxC-1:d_dr_x.in.d[i] = addr_buf.out.d.d[i];)
+
+			decoder_dualrail<NyC,Ny> d_dr_y(.out = outy, .supply = supply);
+			(i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];)

 		}

+		export template<pint Nx, Ny>
+		defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
+			AND2_X1 ands[Nx*Ny];
+			(i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;)
+			(x:0..Nx-1:
+				(y:0..Ny-1:
+					ands[x + y*Nx].a = inx[x];
+					ands[x + y*Nx].b = iny[y];
+					ands[x + y*Nx].y = out[x + y*Nx];
+					)
+				)
+		}
+
+
+		/**
+		 * 2D decoder which uses synapse handshaking using line pulldowns.
+		 *	Nx is the x size of the decoder array
+		 * 	NxC is the number of wires in the x channel.
+		 * 	but my guess is that we can't do logs...
+		 * 	the req on a1of1 out is the req to each synapse.
+		 * 	The ack back from each line should go high when the synapse is charged.
+		 *  N_dly is a hard coded delay of the pull down circuit.
+		 * 	It can be set to 0.
+		 */
+		export template<pint NxC, NyC, Nx, Ny, N_dly>
+		defproc decoder_2d_hs (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? reset_B; power supply) {
+
+			// Buffer to recieve concat(x,y) address packet
+			buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
+
+			// Decoder X/Y And trees
+			decoder_dualrail<NxC,Nx> d_dr_x(.supply = supply);
+			(i:0..NxC-1:d_dr_x.in.d[i] = addr_buf.out.d.d[i];)
+			decoder_dualrail<NyC,Ny> d_dr_y(.supply = supply);
+			(i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];)
+
+			// Validity 
+			vtree<NxC> vtree_x (.supply = supply);
+			vtree<NyC> vtree_y (.supply = supply);
+			(i:0..NxC-1:vtree_x.in.d[i].t = addr_buf.out.d.d[i].t;)
+			(i:0..NxC-1:vtree_x.in.d[i].f = addr_buf.out.d.d[i].f;)
+			(i:0..NyC-1:vtree_y.in.d[i].t = addr_buf.out.d.d[i+NxC].t;)
+			(i:0..NyC-1:vtree_y.in.d[i].f = addr_buf.out.d.d[i+NxC].f;)
+			A_2C_B_X1 C2el(.c1 = vtree_x.out, .c2 = vtree_y.out, .y = addr_buf.out.v,
+				.vdd = supply.vdd, .vss = supply.vss);
+
+
+			// and grid for reqs into synapses
+			and_grid<Nx, Ny> _and_grid(.inx = d_dr_x.out, .iny = d_dr_y.out, .supply = supply);
+			(i:Nx*Ny: out[i].r = _and_grid.out[i];)
+
+			// Acknowledge pull down time
+
+			// Pull UPs on the reqB lines by synapses (easier to invert).
+			bool _out_reqsB[Nx], _out_acksB[Nx]; // The vertical output ack lines from each syn.
+			PULLDOWN2_X4 req_pulldowns[Nx*Ny];
+			pint index;
+			(i:Nx:
+				(j:Ny:
+					index = i + Nx*j;
+					req_pulldowns[index].a = out[index].a;
+					req_pulldowns[index].b = _out_acksB[i];
+					req_pulldowns[index].y = _out_reqsB[i];
+					req_pulldowns[index].vss = supply.vss;
+					req_pulldowns[index].vdd = supply.vdd;
+				)
+			)
+
+			// ReqB keep cells
+			KEEP_X1 req_keeps[Nx];
+			(i:Nx:
+				req_keeps[i].y = _out_reqsB[i];
+				req_keeps[i].vdd = supply.vdd;
+				req_keeps[i].vss = supply.vss;
+			)
+
+			// req-ack buffers
+			sigbuf<Ny> req_bufs[Nx];
+			(i:Nx:
+				req_bufs[i].in = _out_reqsB[i];
+				req_bufs[i].out[0] = _out_acksB[i]; // DANGER DANGER
+				req_bufs[i].supply = supply;
+			)
+
+			// Line end pull UPs (triggered once synapse reqs removed)
+			delay_fifo<N_dly> pu_dlys[Nx];
+			OR2_X1 pu_ORs[Nx];
+			PULLUP_X4 pu[Nx]; // TODO probably replace this with variable strength PU
+			AND2_X1 pu_ANDs[Nx];
+			(i:Nx:
+				pu_dlys[i].in = _out_acksB[i];
+				pu_dlys[i].supply = supply;
+
+				pu_ORs[i].a = pu_dlys[i].out;
+				pu_ORs[i].b = d_dr_x.out[i];
+				pu_ORs[i].vdd = supply.vdd;
+				pu_ORs[i].vss = supply.vss;
+
+				pu_ANDs[i].a = pu_ORs[i].y;
+				pu_ANDs[i].b = reset_B; // TODO buffer
+				pu_ANDs[i].vdd = supply.vdd;
+				pu_ANDs[i].vss = supply.vss;
+
+				pu[i].a = pu_ANDs[i].y;
+				pu[i].y = _out_reqsB[i];
+				pu[i].vdd = supply.vdd;
+				pu[i].vss = supply.vss;
+			)
+
+			// ORtree from all output reqs, back to the buffer ack.
+			// This is instead of the ack that came from the delayed validity trees,
+			// in decoder_2d_dly.
+			ortree<Nx>  _ortree(.out = addr_buf.out.a, .supply = supply);
+			INV_X1 out_req_invs[Nx];
+			(i:Nx:
+				out_req_invs[i].a = _out_reqsB[i];
+				out_req_invs[i].vdd = supply.vdd;
+				out_req_invs[i].vss = supply.vss;
+
+				_ortree.in[i] = out_req_invs[i].y;
+			)
+		}
+
+


 /*
@ -224,21 +361,6 @@ namespace tmpl {

 		}

-
-		export template<pint Nx, Ny>
-		defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
-			AND2_X1 ands[Nx*Ny];
-			(i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;)
-			(x:0..Nx-1:
-				(y:0..Ny-1:
-					ands[x + y*Nx].a = inx[x];
-					ands[x + y*Nx].b = iny[y];
-					ands[x + y*Nx].y = out[x + y*Nx];
-					)
-				)
-		}
-
-
 		// Generates the OR-trees required to go from 
 		// N one-hot inputs to Nc dual rail binary encoding.
 		export template<pint Nc, N>
@ -348,7 +470,7 @@ namespace tmpl {


 		export template<pint NxC, NyC, Nx, Ny, ACK_STRENGTH>
-		defproc encoder2D(a1of1 x[Nx]; a1of1 y[Ny]; avMx1of2<(NxC + NyC)> out; power supply; bool reset_B)		{
+		defproc encoder2D(a1of1 inx[Nx]; a1of1 iny[Ny]; avMx1of2<(NxC + NyC)> out; power supply; bool reset_B)		{
 			// Reset buffers
 			pint H = 2*(NxC + NyC); //Reset strength? to be investigated
 			bool _reset_BX,_reset_BXX[H];
@ -359,10 +481,10 @@ namespace tmpl {
 			a1of1 _arb_out_x, _arb_out_y;
 			a1of1 _x_temp[Nx],_y_temp[Ny]; // For wiring the reqs to the arbtrees
 			(i:Nx:
-				_x_temp[i].r = x[i].r;				
+				_x_temp[i].r = inx[i].r;				
 			)
 			(i:Ny:
-				_y_temp[i].r = y[i].r;				
+				_y_temp[i].r = iny[i].r;				
 			)
 			arbtree<Nx> Xarb(.in = _x_temp,.out = _arb_out_x,.supply = supply);
 			arbtree<Ny> Yarb(.in = _y_temp,.out = _arb_out_y,.supply = supply);
@ -372,12 +494,12 @@ namespace tmpl {
 			sigbuf_1output<ACK_STRENGTH> y_ack_arb[Ny];
 			(i:Nx:
 				x_ack_arb[i].in = _x_temp[i].a;
-				x_ack_arb[i].out = x[i].a;
+				x_ack_arb[i].out = inx[i].a;
 				x_ack_arb[i].supply = supply;
 			)
 			(i:Ny:
 				y_ack_arb[i].in = _y_temp[i].a;
-				y_ack_arb[i].out = y[i].a;
+				y_ack_arb[i].out = iny[i].a;
 				y_ack_arb[i].supply = supply;
 			)

@ -408,7 +530,7 @@ namespace tmpl {

 			// X_req ORtree
 			bool _x_req_array[Nx], _x_v_B;
-			(i:Nx:_x_req_array[i] = x[i].r;)
+			(i:Nx:_x_req_array[i] = inx[i].r;)
 			ortree<Nx> x_req_ortree(.in = _x_req_array,.out = _x_v,.supply = supply); //todo BUFF
 			INV_X1 not_x_req_ortree(.a = _x_v,.y = _x_v_B);

@ -450,17 +572,17 @@ namespace tmpl {
 			// Encoders
 			bool x_acks[Nx];
 			Mx1of2<NxC> x_enc_out;
-			(i:Nx:x_acks[i] = x[i].a;)
+			(i:Nx:x_acks[i] = inx[i].a;)
 			dualrail_encoder<NxC, Nx> x_encoder(.in = x_acks, .out = x_enc_out, .supply = supply);

-			bool y_acks[Nx];
+			bool y_acks[Ny];
 			Mx1of2<NyC> y_enc_out;
-			(i:Ny:y_acks[i] = y[i].a;)
+			(i:Ny:y_acks[i] = iny[i].a;)
 			dualrail_encoder<NyC, Ny> y_encoder(.in = y_acks, .out = y_enc_out, .supply = supply);

 			// Valid trees
 			vtree<NxC> vtree_x(.in = x_enc_out, .out = _in_x_v, .supply = supply);
-			vtree<NxC> vtree_y(.in = y_enc_out, .out = _in_y_v, .supply = supply);
+			vtree<NyC> vtree_y(.in = y_enc_out, .out = _in_y_v, .supply = supply);

 			// Buffer func thing 
 			Mx1of2<NxC + NyC> into_buffer;
@ -474,6 +596,185 @@ namespace tmpl {



+		/**
+	   * Neuron handshaking.
+	   * Looks for a rising edge on the neuron req.
+	   * Then performs a 2d handshake out outy then outx.
+	   */
+		export
+		defproc nrn_hs_2D(a1of1 in; a1of1 outx; a1of1 outy; power supply; bool reset_B)	{
+			bool _reset_BX;
+			BUF_X2 reset_buf(.a = reset_B, .y = _reset_BX, .vdd = supply.vdd, .vss = supply.vss);
+
+			bool _en, _req;
+			
+			// A_1C2N_RB_X1 A_ack(.c1 = _en, .n1 = _req, .n2 = in.r, .y = in.a,
+			// 	.pr_B = _reset_BX, .sr_B = _reset_BX, .vss = supply.vss, .vdd = supply.vdd);
+
+			// Switched it back
+			// Because had the problem that if the req was not removed in time,
+			// it would be recounted as a double spike,
+			// since in.req is still high after the out has been dealt with.
+			A_2C1N_RB_X1 A_ack(.c1 = _en, .c2 = in.r, .n1 = _req, .y = in.a,
+				.pr_B = _reset_BX, .sr_B = _reset_BX, .vss = supply.vss, .vdd = supply.vdd);
+
+
+			A_1C1P_X1 A_en(.p1 = _req, .c1 = in.a, .y = _en,
+				.vss = supply.vss, .vdd = supply.vdd);
+
+			bool _y_a_B, _x_a_B;
+			INV_X2 inv_x(.a = outx.a, .y = _x_a_B, .vss = supply.vss, .vdd = supply.vdd);
+			INV_X2 inv_y(.a = outy.a, .y = _y_a_B, .vss = supply.vss, .vdd = supply.vdd);
+
+			A_2C1P1N_RB_X1 A_req(.p1 = _x_a_B, .c1 = _en, .c2 = _y_a_B, .n1 = in.r, .y = _req,
+				.pr_B = _reset_BX, .sr_B = _reset_BX, .vdd = supply.vdd, .vss = supply.vss);
+
+			// y_req pull up
+			NAND2_X1 nand_y(.a = _y_a_B, .b = _req, .vdd = supply.vdd, .vss = supply.vss);
+			PULLUP_X4 pu_y(.a = nand_y.y, .y = outy.r, .vdd = supply.vdd, .vss = supply.vss);
+
+			// x_req pull up
+			NAND3_X1 nand_x(.a = _x_a_B, .b = _req, .c = outy.a, .vdd = supply.vdd, .vss = supply.vss);
+			PULLUP_X4 pu_x(.a = nand_x.y, .y = outx.r, .vdd = supply.vdd, .vss = supply.vss);
+		}
+
+
+
+		export
+    defproc nrn_line_end_pull_down (bool? in; bool? reset_B; power supply; bool! out)
+    {
+      bool _out, __out, nand_out;
+      BUF_X1 buf1(.a=in, .y=_out, .vdd=supply.vdd,.vss=supply.vss);
+      BUF_X1 buf2(.a=_out, .y=__out, .vdd=supply.vdd,.vss=supply.vss);
+      INV_X1 inv(.a = __out, .vdd=supply.vdd,.vss =supply.vss);
+
+      NAND2_X1 aenor(.a=inv.y, .b=reset_B, .y = nand_out, .vdd=supply.vdd,.vss=supply.vss);
+
+      PULLDOWN_X4 pull_down(.a=nand_out, .y=out);
+    }
+
+
+
+		/**
+	   * A 2d grid of neuron handshakers.
+	   * Should then slot into the encoder.
+	   * Each neuron has an a1of1 channel (in), which is tripped when a neuron spikes.
+	   * N_dly is number of delay elements to add to line pull down, 
+	   * for the purpose of running ACT sims.
+	   * It should probably be set to 0 though.
+	   */
+		export template<pint Nx, Ny, N_dly>
+		defproc nrn_hs_2D_array(a1of1 in[Nx*Ny]; a1of1 outx[Nx], outy[Ny];
+			power supply; bool reset_B) {
+
+			// Make hella signal buffers
+			sigbuf<Ny> rsbx(.in = reset_B, .supply = supply);
+			sigbuf<Nx> rsb[Ny]; // ResetSigBuf
+			(j:Ny:
+				rsb[j].in = rsbx.out[j];
+				rsb[j].supply = supply;
+			)
+
+			// Add buffers on output req lines
+			a1of1 _outx[Nx], _outy[Ny];
+			BUF_X4 out_req_buf_x[Nx];
+			(i:Nx:
+				out_req_buf_x[i].vss = supply.vss;
+				out_req_buf_x[i].vdd = supply.vdd;
+				out_req_buf_x[i].a = _outx[i].r;
+				out_req_buf_x[i].y = outx[i].r;
+			)
+			BUF_X4 out_req_buf_y[Ny];
+			(i:Ny:
+				out_req_buf_y[i].vss = supply.vss;
+				out_req_buf_y[i].vdd = supply.vdd;
+				out_req_buf_y[i].a = _outy[i].r;
+				out_req_buf_y[i].y = outy[i].r;
+			)
+			// Add buffers on output ack lines
+			// Note that this should be generalised.
+			// And probably won't even be done by ACT/innovus anwyay
+			// TODO: do it properly with sigbufs?
+			BUF_X4 out_ack_buf_x[Nx];
+			(i:Nx:
+				out_ack_buf_x[i].vss = supply.vss;
+				out_ack_buf_x[i].vdd = supply.vdd;
+				out_ack_buf_x[i].a = outx[i].a;
+				out_ack_buf_x[i].y = _outx[i].a;
+			)
+			BUF_X4 out_ack_buf_y[Ny];
+			(i:Ny:
+				out_ack_buf_y[i].vss = supply.vss;
+				out_ack_buf_y[i].vdd = supply.vdd;
+				out_ack_buf_y[i].a = outy[i].a;
+				out_ack_buf_y[i].y = _outy[i].a;
+			)
+
+
+
+			// Create handshake grid 
+			pint index;
+			nrn_hs_2D neurons[Nx*Ny];
+			(i:0..Nx-1:
+				(j:0..Ny-1:
+					index = i + j*Nx;
+					neurons[index].supply = supply;
+					neurons[index].reset_B = rsb[j].out[i];
+					neurons[index].in = in[index];
+					neurons[index].outx = _outx[i];
+					neurons[index].outy = _outy[j];
+				)
+			)
+
+			// Create delay fifos to emulate the fact that the line pull downs
+			// are at the end of the line, and thus slow.
+			// Note that if N_dly = 0, delay fifo is just a pipe.
+			delay_fifo<N_dly> dly_x[Nx];
+			delay_fifo<N_dly> dly_y[Ny];	
+			
+			// Create x line req pull downs
+			nrn_line_end_pull_down pd_x[Nx];
+			sigbuf<Nx> rsb_pd_x(.in = reset_B, .supply = supply);
+			(i:0..Nx-1:
+				dly_x[i].supply = supply;
+				dly_x[i].in = _outx[i].a;
+				pd_x[i].in = dly_x[i].out;
+				
+				pd_x[i].out = _outx[i].r;
+				pd_x[i].reset_B = rsb_pd_x.out[i];
+				pd_x[i].supply = supply;
+			)
+
+			// Create y line req pull downs
+			nrn_line_end_pull_down pd_y[Ny];
+			sigbuf<Ny> rsb_pd_y(.in = reset_B, .supply = supply);
+			(j:0..Ny-1:
+				dly_y[j].supply = supply;
+				dly_y[j].in = _outy[j].a;
+				pd_y[j].in = dly_y[j].out;
+			
+				pd_y[j].out = _outy[j].r;
+				pd_y[j].reset_B = rsb_pd_y.out[j];
+				pd_y[j].supply = supply;
+			)
+
+			// Add keeps
+			KEEP_X1 keep_x[Nx];
+			(i:Nx:
+				keep_x[i].vdd = supply.vdd;
+				keep_x[i].vss = supply.vss;
+				keep_x[i].y = _outx[i].r;
+			)
+
+			KEEP_X1 keep_y[Ny];
+			(j:Ny:
+				keep_y[j].vdd = supply.vdd;
+				keep_y[j].vss = supply.vss;
+				keep_y[j].y = _outy[j].r;
+			)
+		}
+
+
 	}

 }
--- a/dataflow_neuro/interfaces.act
+++ b/dataflow_neuro/interfaces.act
@ -0,0 +1,167 @@
+/*************************************************************************
+ *
+ *  This file is part of ACT dataflow neuro library
+ *
+ *  Copyright (c) 2022 University of Groningen - Ole Richter
+ *  Copyright (c) 2022 University of Groningen - Michele Mastella
+ *  Copyright (c) 2022 University of Groningen - Hugh Greatorex
+ *  Copyright (c) 2022 University of Groningen - Madison Cotteret
+ * 
+ *
+ *  This source describes Open Hardware and is licensed under the CERN-OHL-W v2 or later
+ *
+ *  You may redistribute and modify this documentation and make products
+ *  using it under the terms of the CERN-OHL-W v2 (https:/cern.ch/cern-ohl).
+ *  This documentation is distributed WITHOUT ANY EXPRESS OR IMPLIED
+ *  WARRANTY, INCLUDING OF MERCHANTABILITY, SATISFACTORY QUALITY
+ *  AND FITNESS FOR A PARTICULAR PURPOSE. Please see the CERN-OHL-W v2
+ *  for applicable conditions.
+ *
+ *  Source location: https://git.web.rug.nl/bics/actlib_dataflow_neuro
+ *
+ *  As per CERN-OHL-W v2 section 4.1, should You produce hardware based on
+ *  these sources, You must maintain the Source Location visible in its
+ *  documentation.
+ *
+ **************************************************************************
+ */
+import "../../dataflow_neuro/cell_lib_async.act";
+import "../../dataflow_neuro/cell_lib_std.act";
+import "../../dataflow_neuro/treegates.act";
+import "../../dataflow_neuro/primitives.act";
+import std::channel;
+open std::channel;
+
+// import std::func;
+open std;
+
+import std::data;
+open std::data;
+
+
+namespace tmpl {
+	namespace dataflow_neuro {
+
+		/**
+		 * Bundled data (non dual rail, with req)
+		 * 2
+		 * quasi delay insensitive channel (dual rail).
+		 * Basically a buffer with a bitwise conversion in front of it.
+		 */
+		export template<pint N, N_dly_cfg>
+		defproc bd2qdi(bd<N> in; avMx1of2<N> out; bool? dly_cfg[N_dly_cfg]; power supply; bool? reset_B) {
+			// Delay on req_in
+			bool _req;
+			delayprog<N_dly_cfg> dly(.in = in.r, .out = _req, .s = dly_cfg, .supply = supply);
+
+			// sig buff the reset signal
+			bool _reset_BX, _reset_BXX[N];
+			BUF_X4 reset_buf(.a=reset_B, .y=_reset_BX,.vdd=supply.vdd,.vss=supply.vss);
+            sigbuf<N> reset_bufarray(.in=_reset_BX, .out=_reset_BXX, .supply=supply);
+
+            // sig buff the req
+			bool _reqX, _reqXX[N];
+			BUF_X4 req_buf(.a=_req, .y=_reqX,.vdd=supply.vdd,.vss=supply.vss);
+            sigbuf<N> req_bufarray(.in=_reqX, .out=_reqXX, .supply=supply);
+
+            // bd2qdi conversion
+            // Each line goes to a t pin, its not to a f.
+            bool _inB[N];
+            INV_X1 input_invs[N];
+            (i:N:
+            	input_invs[i].a = in.d[i];
+            	input_invs[i].y = _inB[i];
+            	input_invs[i].vss = supply.vss;
+            	input_invs[i].vdd = supply.vdd;
+            )
+
+			// BUFFER
+			// Basically the buffer_s but with the validity tree ripped out
+			// and just connected to in_req instead.
+
+			// And probably need a delay on the in_ack to ensure en has time to disable
+			// before the inputs go to another state.
+			// Actually apparently no: there is a fixed, huge delay, already incurred
+			// by communicating with pads-> uC -> windows 95 and back again.
+
+            // Since the input is never invalid, also need a mechanism
+            // for the output to become invalid, when an out_ack is received. 
+
+            //control
+            bool _en;
+            A_3C_RB_X4 inack_ctl(.c1=_en,.c2=_reqX,.c3=out.v,.y=in.a,.pr_B=_reset_BX,.sr_B=_reset_BX,.vdd=supply.vdd,.vss=supply.vss);
+            A_1C1P_X1 en_ctl(.c1=in.a,.p1=out.v,.y=_en,.vdd=supply.vdd,.vss=supply.vss);
+
+            //function
+            bool _out_a_BX_t[N],_out_a_BX_f[N],_out_a_B,_en_X_t[N],_en_X_f[N];
+            A_2C2N_RB_X4 f_buf_func[N];
+            A_2C2N_RB_X4 t_buf_func[N];
+            sigbuf<N> en_buf_t(.in=_en, .out=_en_X_t, .supply=supply);
+            sigbuf<N> en_buf_f(.in=_en, .out=_en_X_f, .supply=supply);
+            INV_X1 out_a_inv(.a=out.a,.y=_out_a_B, .vss = supply.vss, .vdd = supply.vdd);
+            sigbuf<N> out_a_B_buf_f(.in=_out_a_B,.out=_out_a_BX_t, .supply=supply);
+            sigbuf<N> out_a_B_buf_t(.in=_out_a_B,.out=_out_a_BX_f, .supply=supply);
+            // check if you can also do single var to array connect a=b[N]
+            // and remove them from the loop
+            (i:N: 
+                f_buf_func[i].y=out.d.d[i].f;
+                t_buf_func[i].y=out.d.d[i].t;
+                f_buf_func[i].c1=_en_X_f[i];
+                t_buf_func[i].c1=_en_X_t[i];
+                f_buf_func[i].c2=_out_a_BX_f[i];
+                t_buf_func[i].c2=_out_a_BX_t[i];
+                f_buf_func[i].n1=_inB[i];
+                t_buf_func[i].n1=in.d[i];
+                f_buf_func[i].n2=_reqXX[i];
+                t_buf_func[i].n2=_reqXX[i];
+                f_buf_func[i].vdd=supply.vdd;
+                t_buf_func[i].vdd=supply.vdd;
+                f_buf_func[i].vss=supply.vss;
+                t_buf_func[i].vss=supply.vss;
+                t_buf_func[i].pr_B = _reset_BXX[i];
+                t_buf_func[i].sr_B = _reset_BXX[i];
+                f_buf_func[i].pr_B = _reset_BXX[i];
+                f_buf_func[i].sr_B = _reset_BXX[i];
+            )
+
+
+		}
+
+
+		
+		/**
+		 * quasi delay insensitive channel (dual rail).
+		 * 2
+		 * Bundled data (non dual rail, with req)
+		 */
+		export template<pint N, N_dly_cfg>
+		defproc qdi2bd(avMx1of2<N> in; bd<N> out; bool? dly_cfg[N_dly_cfg]; power supply; bool? reset_B) {
+
+            // Buffer
+            buffer<N> buf(.in = in, .supply = supply, .reset_B = reset_B);
+            buf.out.a = out.a;
+            
+            // Vtree
+            vtree<N> out_vtree(.supply = supply);
+            (i:N:
+            	out_vtree.in.d[i].t = buf.out.d.d[i].t;
+            	out_vtree.in.d[i].f = buf.out.d.d[i].f;
+            )
+            buf.out.v = out_vtree.out;
+
+            // Delay
+            delayprog<N_dly_cfg> dly(.in = out_vtree.out, .out = out.r, .s = dly_cfg, .supply = supply);
+            out_vtree.out = dly.in;
+
+            // Wire output data bits to buffer True lines
+            (i:N:
+            	buf.out.d.d[i].t = out.d[i];
+            )
+
+		}
+
+
+
+	}
+
+}
--- a/dataflow_neuro/primitives.act
+++ b/dataflow_neuro/primitives.act
@ -165,7 +165,7 @@ namespace tmpl {
            fifo_element[i].supply = supply;
            fifo_element[i].reset_B = _reset_BXX[i];
          )
-          fifo_element[N-1].out = out;
+          fifo_element[M-1].out = out;

          // reset buffers
          bool _reset_BX;
@ -685,26 +685,90 @@ namespace tmpl {
            (i:((1<<N)-1):dly[i].vss = supply.vss;)
        }

-        export
-        defproc line_end_pull_up (a1of1 in; bool? reset_B; power supply; bool! out)
-        {
-          bool _out, __out, nor_out;
-          BUF_X4 buf1(.a=in.a, .y=_out, .vdd=supply.vdd,.vss=supply.vss);
-          BUF_X4 buf2(.a=_out, .y=__out, .vdd=supply.vdd,.vss=supply.vss);
+        // Non programmable delays
+        // N is number of delays to have in series (not log!!).
+        // Is useful for testing purposes.
+        // But should probably remove before running innovus etc.
+        export template<pint N>
+        defproc delay_fifo (bool out; bool in; power supply) {
+            { N >= 0 : "What?" };
+            [N >= 1 ->
+            DLY4_X1 dly[N];

-          NOR2_X1 aenor(.a=_out, .b=reset_B, .y = nor_out, .vdd=supply.vdd,.vss=supply.vss);
+            dly[0].vdd = supply.vdd;
+            dly[0].vss = supply.vss;
+            dly[0].a = in;

-          PULLUP_X4 pull_up(.a=nor_out, .y=out);
+            (i:1..N-1:
+                dly[i].vdd = supply.vdd;
+                dly[i].vss = supply.vss;
+                dly[i].a = dly[i-1].y;
+            )
+
+            dly[N-1].vdd = supply.vdd;
+            dly[N-1].vss = supply.vss;
+            dly[N-1].y = out;
+            [] N = 1 ->
+            in = out;
+            ]
+            
        }

-        defproc line_end_pull_down (a1of1 in; bool? reset_B; power supply; bool! out)
+
+        /**
+         * Appends a hard-coded word "VAL" to an input.
+         * Works by piping through all sigs, but adding
+         * some extra sigs when the input is valid.
+         * N is size of channel to pipe through.
+         * NVAL is size of word to be put on output.
+         * VAL is word to be put on output.
+         * Output looks like
+         * 0..............N........N+NVAL-1
+         * --input_data----LSB....MSB
+         *
+         */
+        export template<pint N, NVAL, VAL>
+        defproc append (avMx1of2<N> in; avMx1of2<N+NVAL> out; power supply)
        {
-          bool _out, __out, nor_out;
-          BUF_X4 buf1(.a=in.a, .y=_out, .vdd=supply.vdd,.vss=supply.vss);
-          BUF_X4 buf2(.a=_out, .y=__out, .vdd=supply.vdd,.vss=supply.vss);
+            { N >= 0 : "What?" };
+            { NVAL >= 0 : "What?" };
+            { NVAL < 1<<VAL : "VAL too big!" };

-          NOR2_X1 aenor(.a=_out, .b=reset_B, .y = nor_out, .vdd=supply.vdd,.vss=supply.vss);
+            // valid tree
+            vtree<N> in_val(.supply = supply);
+            (i:N:
+                in_val.in.d[i].t = in.d.d[i].t;
+                in_val.in.d[i].f = in.d.d[i].f;
+            )
+
+            // wire through most signals
+            (i:N:
+                in.d.d[i].t = out.d.d[i].t;
+                in.d.d[i].f = out.d.d[i].f;
+            )
+            in.a = out.a;
+            in.v = out.v;
+
+            // appender
+            pint bitval;
+            sigbuf<NVAL> sb(.in = in_val.out, .supply = supply);
+            TIELO_X1 tielows[NVAL];
+            (i:NVAL:tielows[i].vss = supply.vss; tielows[i].vdd = supply.vdd;)
+            (i:0..NVAL-1:
+                bitval = (VAL & ( 1 << i )) >> i;
+                [  bitval = 1 ->
+                    out.d.d[i+N].t = sb.out[i];
+                    out.d.d[i+N].f = tielows[i].y;
+                [] bitval = 0 ->
+                    out.d.d[i+N].f = sb.out[i];
+                    out.d.d[i+N].t = tielows[i].y;
+                [] bitval >= 2 -> {false : "fuck"};        
+                ]
+            )

-          PULLUP_X4 pull_down(.a=nor_out, .y=out);
        }
+
+
+
+
 }}