diff --git a/dataflow_neuro/coders.act b/dataflow_neuro/coders.act index 94ab938..d1c1ed2 100644 --- a/dataflow_neuro/coders.act +++ b/dataflow_neuro/coders.act @@ -134,9 +134,127 @@ namespace tmpl { decoder_dualrail d_dr_y(.out = outy, .supply = supply); (i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];) - + } + export template + defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) { + AND2_X1 ands[Nx*Ny]; + (i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;) + (x:0..Nx-1: + (y:0..Ny-1: + ands[x + y*Nx].a = inx[x]; + ands[x + y*Nx].b = iny[y]; + ands[x + y*Nx].y = out[x + y*Nx]; + ) + ) + } + + + /** + * 2D decoder which uses synapse handshaking using line pulldowns. + * Nx is the x size of the decoder array + * NxC is the number of wires in the x channel. + * but my guess is that we can't do logs... + * the req on a1of1 out is the req to each synapse. + * The ack back from each line should go high when the synapse is charged. + * N_dly is a hard coded delay of the pull down circuit. + * It can be set to 0. + */ + export template + defproc decoder_2d_hs (avMx1of2 in; a1of1 out[Nx*Ny]; bool? reset_B; power supply) { + + // Buffer to recieve concat(x,y) address packet + buffer addr_buf(.in = in, .reset_B = reset_B, .supply = supply); + + // Decoder X/Y And trees + decoder_dualrail d_dr_x(.supply = supply); + (i:0..NxC-1:d_dr_x.in.d[i] = addr_buf.out.d.d[i];) + decoder_dualrail d_dr_y(.supply = supply); + (i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];) + + // Validity + vtree vtree_x (.supply = supply); + vtree vtree_y (.supply = supply); + (i:0..NxC-1:vtree_x.in.d[i].t = addr_buf.out.d.d[i].t;) + (i:0..NxC-1:vtree_x.in.d[i].f = addr_buf.out.d.d[i].f;) + (i:0..NyC-1:vtree_y.in.d[i].t = addr_buf.out.d.d[i+NxC].t;) + (i:0..NyC-1:vtree_y.in.d[i].f = addr_buf.out.d.d[i+NxC].f;) + A_2C_B_X1 C2el(.c1 = vtree_x.out, .c2 = vtree_y.out, .y = addr_buf.out.v, + .vdd = supply.vdd, .vss = supply.vss); + + + // and grid for reqs into synapses + and_grid _and_grid(.inx = d_dr_x.out, .iny = d_dr_y.out, .supply = supply); + (i:Nx*Ny: out[i].r = _and_grid.out[i];) + + // Acknowledge pull down time + + // Pull UPs on the reqB lines by synapses (easier to invert). + bool _out_reqsB[Nx], _out_acksB[Nx]; // The vertical output ack lines from each syn. + PULLDOWN2_X4 req_pulldowns[Nx*Ny]; + pint index; + (i:Nx: + (j:Ny: + index = i + Nx*j; + req_pulldowns[index].a = out[index].a; + req_pulldowns[index].b = _out_acksB[i]; + req_pulldowns[index].y = _out_reqsB[i]; + req_pulldowns[index].vss = supply.vss; + req_pulldowns[index].vdd = supply.vdd; + ) + ) + + // ReqB keep cells + KEEP_X1 req_keeps[Nx]; + (i:Nx: + req_keeps[i].y = _out_reqsB[i]; + req_keeps[i].vdd = supply.vdd; + req_keeps[i].vss = supply.vss; + ) + + // req-ack buffers + sigbuf req_bufs[Nx]; + (i:Nx: + req_bufs[i].in = _out_reqsB[i]; + req_bufs[i].out[0] = _out_acksB[i]; // DANGER DANGER + req_bufs[i].supply = supply; + ) + + // Line end pull UPs (triggered once synapse reqs removed) + delay_fifo pu_dlys[Nx]; + OR2_X1 pu_ORs[Nx]; + PULLUP_X4 pu[Nx]; // TODO probably replace this with variable strength PU + (i:Nx: + pu_dlys[i].in = _out_acksB[i]; + pu_dlys[i].supply = supply; + + pu_ORs[i].a = pu_dlys[i].out; + pu_ORs[i].b = d_dr_x.out[i]; + pu_ORs[i].vdd = supply.vdd; + pu_ORs[i].vss = supply.vss; + + pu[i].a = pu_ORs[i].y; + pu[i].y = _out_reqsB[i]; + pu[i].vdd = supply.vdd; + pu[i].vss = supply.vss; + ) + + // ORtree from all output reqs, back to the buffer ack. + // This is instead of the ack that came from the delayed validity trees, + // in decoder_2d_dly. + ortree _ortree(.out = addr_buf.out.a, .supply = supply); + INV_X1 out_req_invs[Nx]; + (i:Nx: + out_req_invs[i].a = _out_reqsB[i]; + out_req_invs[i].vdd = supply.vdd; + out_req_invs[i].vss = supply.vss; + + _ortree.in[i] = out_req_invs[i].y; + ) + } + + /* @@ -237,21 +355,6 @@ namespace tmpl { } - - export template - defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) { - AND2_X1 ands[Nx*Ny]; - (i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;) - (x:0..Nx-1: - (y:0..Ny-1: - ands[x + y*Nx].a = inx[x]; - ands[x + y*Nx].b = iny[y]; - ands[x + y*Nx].y = out[x + y*Nx]; - ) - ) - } - - // Generates the OR-trees required to go from // N one-hot inputs to Nc dual rail binary encoding. export template @@ -532,7 +635,7 @@ namespace tmpl { export - defproc line_end_pull_down (bool? in; bool? reset_B; power supply; bool! out) + defproc nrn_line_end_pull_down (bool? in; bool? reset_B; power supply; bool! out) { bool _out, __out, nand_out; BUF_X1 buf1(.a=in, .y=_out, .vdd=supply.vdd,.vss=supply.vss); @@ -624,7 +727,7 @@ namespace tmpl { delay_fifo dly_y[Ny]; // Create x line req pull downs - line_end_pull_down pd_x[Nx]; + nrn_line_end_pull_down pd_x[Nx]; sigbuf rsb_pd_x(.in = reset_B, .supply = supply); (i:0..Nx-1: dly_x[i].supply = supply; @@ -637,7 +740,7 @@ namespace tmpl { ) // Create y line req pull downs - line_end_pull_down pd_y[Ny]; + nrn_line_end_pull_down pd_y[Ny]; sigbuf rsb_pd_y(.in = reset_B, .supply = supply); (j:0..Ny-1: dly_y[j].supply = supply;