diff --git a/dataflow_neuro/cell_lib_async.act b/dataflow_neuro/cell_lib_async.act index 65a7e80..4fcf831 100644 --- a/dataflow_neuro/cell_lib_async.act +++ b/dataflow_neuro/cell_lib_async.act @@ -565,7 +565,7 @@ namespace tmpl { } } export - defproc PULLDOWN_X4(bool? a; bool! y; bool? vdd, vss) + defproc A_1N_U_X4(bool? a; bool! y; bool? vdd, vss) { prs{ [keeper=0] a -> y- @@ -573,7 +573,7 @@ namespace tmpl { } export - defproc PULLDOWN2_X4(bool? a, b; bool! y; bool? vdd, vss) + defproc A_2N_U_X4(bool? a, b; bool! y; bool? vdd, vss) { prs{ [keeper=0] a & b -> y- @@ -581,7 +581,7 @@ namespace tmpl { } export - defproc PULLUP_X4(bool? a; bool! y; bool? vdd, vss) + defproc A_1P_U_X4(bool? a; bool! y; bool? vdd, vss) { prs{ [keeper=0] ~a -> y+ diff --git a/dataflow_neuro/coders.act b/dataflow_neuro/coders.act index 546798f..c656820 100644 --- a/dataflow_neuro/coders.act +++ b/dataflow_neuro/coders.act @@ -202,7 +202,7 @@ defproc decoder_2d_hs (avMx1of2 in; a1of1 out[Nx*Ny]; bool? reset_B; po // Pull DOWNs on the ackB lines by synapses (easier to invert). bool _out_acksB[Nx]; // The vertical output ack lines from each syn. - PULLDOWN2_X4 ack_pulldowns[Nx*Ny]; + A_2N_U_X4 ack_pulldowns[Nx*Ny]; pint index; (i:Nx: (j:Ny: @@ -220,8 +220,8 @@ defproc decoder_2d_hs (avMx1of2 in; a1of1 out[Nx*Ny]; bool? reset_B; po // bc smaller // and bc the delay that an AND induces means that the pullup could // end up fighting a synapse pulldown, as both have the correct req sigs. - PULLUP_X4 pu[Nx]; // TODO probably replace this with variable strength PU - PULLUP_X4 pu_reset[Nx]; + A_1P_U_X4 pu[Nx]; // TODO probably replace this with variable strength PU + A_1P_U_X4 pu_reset[Nx]; (i:Nx: pu[i].a = d_dr_xX[i].out[Ny]; pu[i].y = _out_acksB[i]; @@ -256,6 +256,118 @@ defproc decoder_2d_hs (avMx1of2 in; a1of1 out[Nx*Ny]; bool? reset_B; po } +/** + * 2D decoder which uses either synapse handshaking, or just a delay. + * Controlled by the "hs_en" (handshake_enable) config bit. + * hs_en = 0 -> use delayed version. + * hs_en = 1 -> use synapse handshaking. + * Regardless of which version is used, the final ack going to the buffer + * goes through the prog_delay block. + * Thus, for the handshaking version to be used "correctly", + * dly_cfg should be set to all zeros. + */ +export template +defproc decoder_2d_hybrid (avMx1of2 in; a1of1 out[Nx*Ny]; bool? dly_cfg[N_dly_cfg], hs_en, + reset_B; power supply) { + + // Buffer to recieve concat(x,y) address packet + buffer addr_buf(.in = in, .reset_B = reset_B, .supply = supply); + + // Decoder X/Y And trees + decoder_dualrail d_dr_x(.supply = supply); + (i:0..NxC-1:d_dr_x.in.d[i] = addr_buf.out.d.d[i];) + decoder_dualrail d_dr_y(.supply = supply); + (i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];) + + // sig buf for reqx lines, since they go to synapse pull down gates. + sigbuf d_dr_xX[Nx]; + (i:Nx: + d_dr_xX[i].in = d_dr_x.out[i]; + d_dr_xX[i].supply = supply; + ) + + // Validity + vtree vtree_x (.supply = supply); + vtree vtree_y (.supply = supply); + (i:0..NxC-1:vtree_x.in.d[i].t = addr_buf.out.d.d[i].t;) + (i:0..NxC-1:vtree_x.in.d[i].f = addr_buf.out.d.d[i].f;) + (i:0..NyC-1:vtree_y.in.d[i].t = addr_buf.out.d.d[i+NxC].t;) + (i:0..NyC-1:vtree_y.in.d[i].f = addr_buf.out.d.d[i+NxC].f;) + A_2C_B_X1 valid_Cel(.c1 = vtree_x.out, .c2 = vtree_y.out, .y = addr_buf.out.v, + .vdd = supply.vdd, .vss = supply.vss); + + + // and grid for reqs into synapses + and_grid _and_grid(.inx = d_dr_x.out, .iny = d_dr_y.out, .supply = supply); + (i:Nx*Ny: out[i].r = _and_grid.out[i];) + + // Acknowledge pull down time + + // Pull DOWNs on the ackB lines by synapses (easier to invert). + bool _out_acksB[Nx]; // The vertical output ack lines from each syn. + A_2N_U_X4 ack_pulldowns[Nx*Ny]; + pint index; + (i:Nx: + (j:Ny: + index = i + Nx*j; + ack_pulldowns[index].a = out[index].a; + ack_pulldowns[index].b = d_dr_xX[i].out[j]; + ack_pulldowns[index].y = _out_acksB[i]; + ack_pulldowns[index].vss = supply.vss; + ack_pulldowns[index].vdd = supply.vdd; + ) + ) + + // Line end pull UPs (triggered once reqs removed) + // Use two pullups rather than and-pullup + // bc smaller + // and bc the delay that an AND induces means that the pullup could + // end up fighting a synapse pulldown, as both have the correct req sigs. + A_1P_U_X4 pu[Nx]; // TODO probably replace this with variable strength PU + A_1P_U_X4 pu_reset[Nx]; + (i:Nx: + pu[i].a = d_dr_xX[i].out[Ny]; + pu[i].y = _out_acksB[i]; + pu[i].vdd = supply.vdd; + pu[i].vss = supply.vss; + + pu_reset[i].a = reset_B; + pu_reset[i].y = _out_acksB[i]; + pu_reset[i].vdd = supply.vdd; + pu_reset[i].vss = supply.vss; + ) + + // ORtree from all output acks, back to the buffer ack. + // This is instead of the ack that came from the delayed validity trees, + // in decoder_2d_dly. + ortree _ortree(.supply = supply); + INV_X1 out_ack_invs[Nx]; + (i:Nx: + out_ack_invs[i].a = _out_acksB[i]; + out_ack_invs[i].vdd = supply.vdd; + out_ack_invs[i].vss = supply.vss; + + _ortree.in[i] = out_ack_invs[i].y; + ) + + // C element to ensure that the buffer receives an invalid + // _only_ once _both_ ackB has been reset, _and_ its output data + // has been fully invalidated. + // Otherwise run into the issue that ack is removed before data is invalid. + A_2C_B_X1 buf_ack_Cel(.c1 = _ortree.out, .c2 = valid_Cel.y, .y = addr_buf.out.a, + .vdd = supply.vdd, .vss = supply.vss); + + + + + // Programmable delay + delayprog dly(.s = dly_cfg, .supply = supply); + dly.out = addr_buf.out.a; + + + +} + /* @@ -626,11 +738,11 @@ defproc decoder_2d_hs (avMx1of2 in; a1of1 out[Nx*Ny]; bool? reset_B; po // y_req pull up NAND2_X1 nand_y(.a = _y_a_B, .b = _req, .vdd = supply.vdd, .vss = supply.vss); - PULLUP_X4 pu_y(.a = nand_y.y, .y = outy.r, .vdd = supply.vdd, .vss = supply.vss); + A_1P_U_X4 pu_y(.a = nand_y.y, .y = outy.r, .vdd = supply.vdd, .vss = supply.vss); // x_req pull up NAND3_X1 nand_x(.a = _x_a_B, .b = _req, .c = outy.a, .vdd = supply.vdd, .vss = supply.vss); - PULLUP_X4 pu_x(.a = nand_x.y, .y = outx.r, .vdd = supply.vdd, .vss = supply.vss); + A_1P_U_X4 pu_x(.a = nand_x.y, .y = outx.r, .vdd = supply.vdd, .vss = supply.vss); } @@ -645,7 +757,7 @@ defproc decoder_2d_hs (avMx1of2 in; a1of1 out[Nx*Ny]; bool? reset_B; po NAND2_X1 aenor(.a=inv.y, .b=reset_B, .y = nand_out, .vdd=supply.vdd,.vss=supply.vss); - PULLDOWN_X4 pull_down(.a=nand_out, .y=out); + A_1N_U_X4 pull_down(.a=nand_out, .y=out); }