diff --git a/dataflow_neuro/coders.act b/dataflow_neuro/coders.act
index 85d8e49..b896ec3 100644
--- a/dataflow_neuro/coders.act
+++ b/dataflow_neuro/coders.act
@@ -715,7 +715,7 @@ defproc decoder_2d_hybrid (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? dly_cfg
 			bool _x_a_B2; // sorry
 			
 			bool _en;
-			A_1C3P2P2N_R_X1 x_ack(); // NEEDS BUFFERING TO X4
+			A_1C3P2P2N_R_X1 x_ack();
 			//branch1
 			x_ack.p4 = _in_x_v;
 			x_ack.p5 = _x_v_B;
@@ -803,13 +803,26 @@ defproc decoder_2d_hybrid (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? dly_cfg
 			A_2C1P1N_RB_X1 A_req(.p1 = _x_a_B, .c1 = _en, .c2 = _y_a_B, .n1 = in.r, .y = _req,
 				.pr_B = _reset_BX, .sr_B = _reset_BX, .vdd = supply.vdd, .vss = supply.vss);
 
+			// // y_req pull up
+			// NAND2_X1 nand_y(.a = _y_a_B, .b = _req, .vdd = supply.vdd, .vss = supply.vss);
+			// A_1P_U_X4 pu_y(.a = nand_y.y, .y = outy.r, .vdd = supply.vdd, .vss = supply.vss);
+ 
+			// // x_req pull up
+			// NAND3_X1 nand_x(.a = _x_a_B, .b = _req, .c = outy.a, .vdd = supply.vdd, .vss = supply.vss);
+			// A_1P_U_X4 pu_x(.a = nand_x.y, .y = outx.r, .vdd = supply.vdd, .vss = supply.vss);
+
+			// Better version with fewer timing assumptions
+			// Core change is that the out acks stop the pullups without any delay.
 			// y_req pull up
-			NAND2_X1 nand_y(.a = _y_a_B, .b = _req, .vdd = supply.vdd, .vss = supply.vss);
-			A_1P_U_X4 pu_y(.a = nand_y.y, .y = outy.r, .vdd = supply.vdd, .vss = supply.vss);
+			bool _reqB;
+			INV_X1 req_inv(.a = _req, .y = _reqB, .vdd= supply.vdd, .vss = supply.vss);
+			A_2P_U_X4 pu_y(.a = _reqB, .b = outy.a, .y = outy.r, .vdd = supply.vdd, .vss = supply.vss);
  
 			// x_req pull up
-			NAND3_X1 nand_x(.a = _x_a_B, .b = _req, .c = outy.a, .vdd = supply.vdd, .vss = supply.vss);
-			A_1P_U_X4 pu_x(.a = nand_x.y, .y = outx.r, .vdd = supply.vdd, .vss = supply.vss);
+			A_3P_U_X4 pu_x(.a = outx.a, .b = _reqB, .c = _y_a_B, .y = outx.r,
+				.vdd = supply.vdd, .vss = supply.vss);
+
+
 		}