diff --git a/dataflow_neuro/coders.act b/dataflow_neuro/coders.act
index 94ab938..d1c1ed2 100644
--- a/dataflow_neuro/coders.act
+++ b/dataflow_neuro/coders.act
@@ -134,9 +134,127 @@ namespace tmpl {
 
 			decoder_dualrail<NyC,Ny> d_dr_y(.out = outy, .supply = supply);
 			(i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];)
-			
+
 		}
 
+		export template<pint Nx, Ny>
+		defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
+			AND2_X1 ands[Nx*Ny];
+			(i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;)
+			(x:0..Nx-1:
+				(y:0..Ny-1:
+					ands[x + y*Nx].a = inx[x];
+					ands[x + y*Nx].b = iny[y];
+					ands[x + y*Nx].y = out[x + y*Nx];
+					)
+				)
+		}
+
+
+		/**
+		 * 2D decoder which uses synapse handshaking using line pulldowns.
+		 *	Nx is the x size of the decoder array
+		 * 	NxC is the number of wires in the x channel.
+		 * 	but my guess is that we can't do logs...
+		 * 	the req on a1of1 out is the req to each synapse.
+		 * 	The ack back from each line should go high when the synapse is charged.
+		 *  N_dly is a hard coded delay of the pull down circuit.
+		 * 	It can be set to 0.
+		 */
+		export template<pint NxC, NyC, Nx, Ny, N_dly>
+		defproc decoder_2d_hs (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? reset_B; power supply) {
+
+			// Buffer to recieve concat(x,y) address packet
+			buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
+
+			// Decoder X/Y And trees
+			decoder_dualrail<NxC,Nx> d_dr_x(.supply = supply);
+			(i:0..NxC-1:d_dr_x.in.d[i] = addr_buf.out.d.d[i];)
+			decoder_dualrail<NyC,Ny> d_dr_y(.supply = supply);
+			(i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];)
+
+			// Validity 
+			vtree<NxC> vtree_x (.supply = supply);
+			vtree<NyC> vtree_y (.supply = supply);
+			(i:0..NxC-1:vtree_x.in.d[i].t = addr_buf.out.d.d[i].t;)
+			(i:0..NxC-1:vtree_x.in.d[i].f = addr_buf.out.d.d[i].f;)
+			(i:0..NyC-1:vtree_y.in.d[i].t = addr_buf.out.d.d[i+NxC].t;)
+			(i:0..NyC-1:vtree_y.in.d[i].f = addr_buf.out.d.d[i+NxC].f;)
+			A_2C_B_X1 C2el(.c1 = vtree_x.out, .c2 = vtree_y.out, .y = addr_buf.out.v,
+				.vdd = supply.vdd, .vss = supply.vss);
+
+
+			// and grid for reqs into synapses
+			and_grid<Nx, Ny> _and_grid(.inx = d_dr_x.out, .iny = d_dr_y.out, .supply = supply);
+			(i:Nx*Ny: out[i].r = _and_grid.out[i];)
+
+			// Acknowledge pull down time
+
+			// Pull UPs on the reqB lines by synapses (easier to invert).
+			bool _out_reqsB[Nx], _out_acksB[Nx]; // The vertical output ack lines from each syn.
+			PULLDOWN2_X4 req_pulldowns[Nx*Ny];
+			pint index;
+			(i:Nx:
+				(j:Ny:
+					index = i + Nx*j;
+					req_pulldowns[index].a = out[index].a;
+					req_pulldowns[index].b = _out_acksB[i];
+					req_pulldowns[index].y = _out_reqsB[i];
+					req_pulldowns[index].vss = supply.vss;
+					req_pulldowns[index].vdd = supply.vdd;
+				)
+			)
+
+			// ReqB keep cells
+			KEEP_X1 req_keeps[Nx];
+			(i:Nx:
+				req_keeps[i].y = _out_reqsB[i];
+				req_keeps[i].vdd = supply.vdd;
+				req_keeps[i].vss = supply.vss;
+			)
+
+			// req-ack buffers
+			sigbuf<Ny> req_bufs[Nx];
+			(i:Nx:
+				req_bufs[i].in = _out_reqsB[i];
+				req_bufs[i].out[0] = _out_acksB[i]; // DANGER DANGER
+				req_bufs[i].supply = supply;
+			)
+
+			// Line end pull UPs (triggered once synapse reqs removed)
+			delay_fifo<N_dly> pu_dlys[Nx];
+			OR2_X1 pu_ORs[Nx];
+			PULLUP_X4 pu[Nx]; // TODO probably replace this with variable strength PU
+			(i:Nx:
+				pu_dlys[i].in = _out_acksB[i];
+				pu_dlys[i].supply = supply;
+
+				pu_ORs[i].a = pu_dlys[i].out;
+				pu_ORs[i].b = d_dr_x.out[i];
+				pu_ORs[i].vdd = supply.vdd;
+				pu_ORs[i].vss = supply.vss;
+
+				pu[i].a = pu_ORs[i].y;
+				pu[i].y = _out_reqsB[i];
+				pu[i].vdd = supply.vdd;
+				pu[i].vss = supply.vss;
+			)
+
+			// ORtree from all output reqs, back to the buffer ack.
+			// This is instead of the ack that came from the delayed validity trees,
+			// in decoder_2d_dly.
+			ortree<Nx>  _ortree(.out = addr_buf.out.a, .supply = supply);
+			INV_X1 out_req_invs[Nx];
+			(i:Nx:
+				out_req_invs[i].a = _out_reqsB[i];
+				out_req_invs[i].vdd = supply.vdd;
+				out_req_invs[i].vss = supply.vss;
+
+				_ortree.in[i] = out_req_invs[i].y;
+			)
+		}
+
+
 
 
 /*
@@ -237,21 +355,6 @@ namespace tmpl {
 
 		}
 
-
-		export template<pint Nx, Ny>
-		defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
-			AND2_X1 ands[Nx*Ny];
-			(i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;)
-			(x:0..Nx-1:
-				(y:0..Ny-1:
-					ands[x + y*Nx].a = inx[x];
-					ands[x + y*Nx].b = iny[y];
-					ands[x + y*Nx].y = out[x + y*Nx];
-					)
-				)
-		}
-
-
 		// Generates the OR-trees required to go from 
 		// N one-hot inputs to Nc dual rail binary encoding.
 		export template<pint Nc, N>
@@ -532,7 +635,7 @@ namespace tmpl {
 
 
 		export
-    defproc line_end_pull_down (bool? in; bool? reset_B; power supply; bool! out)
+    defproc nrn_line_end_pull_down (bool? in; bool? reset_B; power supply; bool! out)
     {
       bool _out, __out, nand_out;
       BUF_X1 buf1(.a=in, .y=_out, .vdd=supply.vdd,.vss=supply.vss);
@@ -624,7 +727,7 @@ namespace tmpl {
 			delay_fifo<N_dly> dly_y[Ny];	
 			
 			// Create x line req pull downs
-			line_end_pull_down pd_x[Nx];
+			nrn_line_end_pull_down pd_x[Nx];
 			sigbuf<Nx> rsb_pd_x(.in = reset_B, .supply = supply);
 			(i:0..Nx-1:
 				dly_x[i].supply = supply;
@@ -637,7 +740,7 @@ namespace tmpl {
 			)
 
 			// Create y line req pull downs
-			line_end_pull_down pd_y[Ny];
+			nrn_line_end_pull_down pd_y[Ny];
 			sigbuf<Ny> rsb_pd_y(.in = reset_B, .supply = supply);
 			(j:0..Ny-1:
 				dly_y[j].supply = supply;