vastly improved lazy synapse handshakes
This commit is contained in:
parent
cd5d41d7f8
commit
2e4cdd5029
@ -53,223 +53,186 @@ namespace tmpl {
|
|||||||
* Nc is the number of dualrail input channels.
|
* Nc is the number of dualrail input channels.
|
||||||
* Then builds N output AND gates, connecting to the right input wires.
|
* Then builds N output AND gates, connecting to the right input wires.
|
||||||
*/
|
*/
|
||||||
export template<pint Nc, N>
|
export template<pint Nc, N>
|
||||||
defproc decoder_dualrail (Mx1of2<Nc> in; bool? out[N]; power supply) {
|
defproc decoder_dualrail (Mx1of2<Nc> in; bool? out[N]; power supply) {
|
||||||
// signal buffers
|
// signal buffers
|
||||||
sigbuf<N> in_tX[Nc];
|
sigbuf<N> in_tX[Nc];
|
||||||
sigbuf<N> in_fX[Nc];
|
sigbuf<N> in_fX[Nc];
|
||||||
(i:Nc:
|
(i:Nc:
|
||||||
in_tX[i].supply = supply;
|
in_tX[i].supply = supply;
|
||||||
in_tX[i].in = in.d[i].t;
|
in_tX[i].in = in.d[i].t;
|
||||||
|
|
||||||
in_fX[i].supply = supply;
|
in_fX[i].supply = supply;
|
||||||
in_fX[i].in = in.d[i].f;
|
in_fX[i].in = in.d[i].f;
|
||||||
|
)
|
||||||
|
|
||||||
|
// AND trees
|
||||||
|
pint bitval;
|
||||||
|
andtree<Nc> atree[N];
|
||||||
|
(k:0..N-1:atree[k].supply = supply;)
|
||||||
|
(i:0..N-1:
|
||||||
|
(j:0..Nc-1:
|
||||||
|
bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
|
||||||
|
[bitval = 1 ->
|
||||||
|
atree[i].in[j] = in_tX[j].out[i];
|
||||||
|
// atree[i].in[j] = addr_buf.out.d.d[j].t;
|
||||||
|
[]bitval = 0 ->
|
||||||
|
atree[i].in[j] = in_fX[j].out[i];
|
||||||
|
// atree[i].in[j] = addr_buf.out.d.d[j].f;
|
||||||
|
[]bitval >= 2 -> {false : "fuck"};
|
||||||
|
]
|
||||||
|
atree[i].out = out[i];
|
||||||
)
|
)
|
||||||
|
)
|
||||||
// AND trees
|
}
|
||||||
pint bitval;
|
|
||||||
andtree<Nc> atree[N];
|
|
||||||
(k:0..N-1:atree[k].supply = supply;)
|
|
||||||
(i:0..N-1:
|
|
||||||
(j:0..Nc-1:
|
|
||||||
bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
|
|
||||||
[bitval = 1 ->
|
|
||||||
atree[i].in[j] = in_tX[j].out[i];
|
|
||||||
// atree[i].in[j] = addr_buf.out.d.d[j].t;
|
|
||||||
[]bitval = 0 ->
|
|
||||||
atree[i].in[j] = in_fX[j].out[i];
|
|
||||||
// atree[i].in[j] = addr_buf.out.d.d[j].f;
|
|
||||||
[]bitval >= 2 -> {false : "fuck"};
|
|
||||||
]
|
|
||||||
atree[i].out = out[i];
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 2D decoder which uses a configurable delay from the VCtrees to buffer ack.
|
* 2D decoder which uses a configurable delay from the VCtrees to buffer ack.
|
||||||
* Nx is the x size of the decoder array
|
* Nx is the x size of the decoder array
|
||||||
* NxC is the number of wires in the x channel.
|
* NxC is the number of wires in the x channel.
|
||||||
* Thus NxC should be something like NxC = ceil(log2(Nx))
|
* Thus NxC should be something like NxC = ceil(log2(Nx))
|
||||||
* but my guess is that we can't do logs...
|
* but my guess is that we can't do logs...
|
||||||
* N_dly_cfg is the number of config bits in the ACK delay line,
|
* N_dly_cfg is the number of config bits in the ACK delay line,
|
||||||
* with all bits high corresponding to 2**N_dly_cfg -1 DLY4_X1 cells.
|
* with all bits high corresponding to 2**N_dly_cfg -1 DLY4_X1 cells.
|
||||||
*/
|
*/
|
||||||
export template<pint NxC, NyC, Nx, Ny, N_dly_cfg>
|
export template<pint NxC, NyC, Nx, Ny, N_dly_cfg>
|
||||||
defproc decoder_2d_dly (avMx1of2<NxC+NyC> in; bool? outx[Nx], outy[Ny],
|
defproc decoder_2d_dly (avMx1of2<NxC+NyC> in; bool? outx[Nx], outy[Ny],
|
||||||
dly_cfg[N_dly_cfg], reset_B; power supply) {
|
dly_cfg[N_dly_cfg], reset_B; power supply) {
|
||||||
|
|
||||||
// Buffer to recieve concat(x,y) address packet
|
// Buffer to recieve concat(x,y) address packet
|
||||||
buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
|
buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
|
||||||
|
|
||||||
// Validity trees
|
// Validity trees
|
||||||
vtree<NxC> vtree_x (.supply = supply);
|
vtree<NxC> vtree_x (.supply = supply);
|
||||||
vtree<NyC> vtree_y (.supply = supply);
|
vtree<NyC> vtree_y (.supply = supply);
|
||||||
(i:0..NxC-1:vtree_x.in.d[i].t = addr_buf.out.d.d[i].t;)
|
(i:0..NxC-1:vtree_x.in.d[i].t = addr_buf.out.d.d[i].t;)
|
||||||
(i:0..NxC-1:vtree_x.in.d[i].f = addr_buf.out.d.d[i].f;)
|
(i:0..NxC-1:vtree_x.in.d[i].f = addr_buf.out.d.d[i].f;)
|
||||||
(i:0..NyC-1:vtree_y.in.d[i].t = addr_buf.out.d.d[i+NxC].t;)
|
(i:0..NyC-1:vtree_y.in.d[i].t = addr_buf.out.d.d[i+NxC].t;)
|
||||||
(i:0..NyC-1:vtree_y.in.d[i].f = addr_buf.out.d.d[i+NxC].f;)
|
(i:0..NyC-1:vtree_y.in.d[i].f = addr_buf.out.d.d[i+NxC].f;)
|
||||||
|
|
||||||
|
|
||||||
// Delay ack line. Ack line is delayed (but not the val)
|
// Delay ack line. Ack line is delayed (but not the val)
|
||||||
A_2C_B_X1 C2el(.c1 = vtree_x.out, .c2 = vtree_y.out, .vdd = supply.vdd, .vss = supply.vss);
|
A_2C_B_X1 C2el(.c1 = vtree_x.out, .c2 = vtree_y.out, .vdd = supply.vdd, .vss = supply.vss);
|
||||||
addr_buf.out.v = C2el.y;
|
addr_buf.out.v = C2el.y;
|
||||||
|
|
||||||
// delayprog<N_dly_cfg> dly(.in = tielow.y, .s = dly_cfg, .supply = supply);
|
// delayprog<N_dly_cfg> dly(.in = tielow.y, .s = dly_cfg, .supply = supply);
|
||||||
delayprog<N_dly_cfg> dly(.in = C2el.y, .s = dly_cfg, .supply = supply);
|
delayprog<N_dly_cfg> dly(.in = C2el.y, .s = dly_cfg, .supply = supply);
|
||||||
|
|
||||||
// ACK MAY HAVE BEEN DISCONNECTED HERE
|
// ACK MAY HAVE BEEN DISCONNECTED HERE
|
||||||
// FOR TESTING PURPOSES
|
// FOR TESTING PURPOSES
|
||||||
// !!!!!!!!!!!!!!!!
|
// !!!!!!!!!!!!!!!!
|
||||||
dly.out = addr_buf.out.a;
|
dly.out = addr_buf.out.a;
|
||||||
// ACK MAY HAVE BEEN DISCONNECTED HERE
|
// ACK MAY HAVE BEEN DISCONNECTED HERE
|
||||||
// FOR TESTING PURPOSES
|
// FOR TESTING PURPOSES
|
||||||
// !!!!!!!!!!!!!!!!
|
// !!!!!!!!!!!!!!!!
|
||||||
|
|
||||||
|
|
||||||
// Decoder X/Y And trees
|
// Decoder X/Y And trees
|
||||||
decoder_dualrail<NxC,Nx> d_dr_x(.out = outx, .supply = supply);
|
decoder_dualrail<NxC,Nx> d_dr_x(.out = outx, .supply = supply);
|
||||||
(i:0..NxC-1:d_dr_x.in.d[i] = addr_buf.out.d.d[i];)
|
(i:0..NxC-1:d_dr_x.in.d[i] = addr_buf.out.d.d[i];)
|
||||||
|
|
||||||
decoder_dualrail<NyC,Ny> d_dr_y(.out = outy, .supply = supply);
|
decoder_dualrail<NyC,Ny> d_dr_y(.out = outy, .supply = supply);
|
||||||
(i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];)
|
(i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export template<pint Nx, Ny>
|
export template<pint Nx, Ny>
|
||||||
defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
|
defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
|
||||||
AND2_X1 ands[Nx*Ny];
|
AND2_X1 ands[Nx*Ny];
|
||||||
(i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;)
|
(i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;)
|
||||||
(x:0..Nx-1:
|
(x:0..Nx-1:
|
||||||
(y:0..Ny-1:
|
(y:0..Ny-1:
|
||||||
ands[x + y*Nx].a = inx[x];
|
ands[x + y*Nx].a = inx[x];
|
||||||
ands[x + y*Nx].b = iny[y];
|
ands[x + y*Nx].b = iny[y];
|
||||||
ands[x + y*Nx].y = out[x + y*Nx];
|
ands[x + y*Nx].y = out[x + y*Nx];
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 2D decoder which uses synapse handshaking using line pulldowns.
|
|
||||||
* Nx is the x size of the decoder array
|
|
||||||
* NxC is the number of wires in the x channel.
|
|
||||||
* but my guess is that we can't do logs...
|
|
||||||
* the req on a1of1 out is the req to each synapse.
|
|
||||||
* The ack back from each line should go high when the synapse is charged.
|
|
||||||
* N_dly is a hard coded delay of the pull down circuit.
|
|
||||||
* It can be set to 0.
|
|
||||||
*/
|
|
||||||
export template<pint NxC, NyC, Nx, Ny, N_dly>
|
|
||||||
defproc decoder_2d_hs (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? reset_B; power supply) {
|
|
||||||
|
|
||||||
// Buffer to recieve concat(x,y) address packet
|
|
||||||
buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
|
|
||||||
|
|
||||||
// Decoder X/Y And trees
|
|
||||||
decoder_dualrail<NxC,Nx> d_dr_x(.supply = supply);
|
|
||||||
(i:0..NxC-1:d_dr_x.in.d[i] = addr_buf.out.d.d[i];)
|
|
||||||
decoder_dualrail<NyC,Ny> d_dr_y(.supply = supply);
|
|
||||||
(i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];)
|
|
||||||
|
|
||||||
// Validity
|
|
||||||
vtree<NxC> vtree_x (.supply = supply);
|
|
||||||
vtree<NyC> vtree_y (.supply = supply);
|
|
||||||
(i:0..NxC-1:vtree_x.in.d[i].t = addr_buf.out.d.d[i].t;)
|
|
||||||
(i:0..NxC-1:vtree_x.in.d[i].f = addr_buf.out.d.d[i].f;)
|
|
||||||
(i:0..NyC-1:vtree_y.in.d[i].t = addr_buf.out.d.d[i+NxC].t;)
|
|
||||||
(i:0..NyC-1:vtree_y.in.d[i].f = addr_buf.out.d.d[i+NxC].f;)
|
|
||||||
A_2C_B_X1 C2el(.c1 = vtree_x.out, .c2 = vtree_y.out, .y = addr_buf.out.v,
|
|
||||||
.vdd = supply.vdd, .vss = supply.vss);
|
|
||||||
|
|
||||||
|
|
||||||
// and grid for reqs into synapses
|
|
||||||
and_grid<Nx, Ny> _and_grid(.inx = d_dr_x.out, .iny = d_dr_y.out, .supply = supply);
|
|
||||||
(i:Nx*Ny: out[i].r = _and_grid.out[i];)
|
|
||||||
|
|
||||||
// Acknowledge pull down time
|
|
||||||
|
|
||||||
// Pull DOWNs on the reqB lines by synapses (easier to invert).
|
|
||||||
bool _out_reqsB[Nx], _out_acksB[Nx]; // The vertical output ack lines from each syn.
|
|
||||||
PULLDOWN2_X4 req_pulldowns[Nx*Ny];
|
|
||||||
pint index;
|
|
||||||
(i:Nx:
|
|
||||||
(j:Ny:
|
|
||||||
index = i + Nx*j;
|
|
||||||
req_pulldowns[index].a = out[index].a;
|
|
||||||
req_pulldowns[index].b = _out_acksB[i];
|
|
||||||
req_pulldowns[index].y = _out_reqsB[i];
|
|
||||||
req_pulldowns[index].vss = supply.vss;
|
|
||||||
req_pulldowns[index].vdd = supply.vdd;
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
)
|
||||||
// ReqB keep cells
|
}
|
||||||
KEEP_X1 req_keeps[Nx];
|
|
||||||
(i:Nx:
|
|
||||||
req_keeps[i].y = _out_reqsB[i];
|
|
||||||
req_keeps[i].vdd = supply.vdd;
|
|
||||||
req_keeps[i].vss = supply.vss;
|
|
||||||
)
|
|
||||||
|
|
||||||
// req-ack buffers
|
|
||||||
// Delay needed here, since otherwise the pull up of reqB happens too quickly.
|
|
||||||
// Means that the pull up may start fighting the synapse,
|
|
||||||
// since the synapse has not yet retracted its ack.
|
|
||||||
// Also there is the possibility, if really fast, that the line pull up block
|
|
||||||
// doesn't yet see that the input is valid, and starts pulling up.
|
|
||||||
// In any case, this delay is important.
|
|
||||||
sigbuf<Ny> req_bufs[Nx];
|
|
||||||
delay_chain<N_dly> ack_delays[Nx];
|
|
||||||
(i:Nx:
|
|
||||||
ack_delays[i].in = _out_reqsB[i];
|
|
||||||
ack_delays[i].supply = supply;
|
|
||||||
|
|
||||||
// req_bufs[i].in = _out_reqsB[i];
|
|
||||||
req_bufs[i].in = ack_delays[i].out;
|
|
||||||
req_bufs[i].out[0] = _out_acksB[i]; // DANGER DANGER
|
|
||||||
req_bufs[i].supply = supply;
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 2D decoder which uses synapse handshaking using line pulldowns.
|
||||||
|
* Nx is the x size of the decoder array
|
||||||
|
* NxC is the number of wires in the x channel.
|
||||||
|
* but my guess is that we can't do logs...
|
||||||
|
* the req on a1of1 out is the req to each synapse.
|
||||||
|
* The ack back from each line should go high when the synapse is charged.
|
||||||
|
* N_dly is a hard coded delay of the pull down circuit.
|
||||||
|
* It can be set to 0.
|
||||||
|
*/
|
||||||
|
export template<pint NxC, NyC, Nx, Ny, N_dly>
|
||||||
|
defproc decoder_2d_hs (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? reset_B; power supply) {
|
||||||
|
|
||||||
)
|
// Buffer to recieve concat(x,y) address packet
|
||||||
|
buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
|
||||||
|
|
||||||
// Line end pull UPs (triggered once synapse reqs removed)
|
// Decoder X/Y And trees
|
||||||
OR2_X1 pu_ORs[Nx];
|
decoder_dualrail<NxC,Nx> d_dr_x(.supply = supply);
|
||||||
PULLUP_X4 pu[Nx]; // TODO probably replace this with variable strength PU
|
(i:0..NxC-1:d_dr_x.in.d[i] = addr_buf.out.d.d[i];)
|
||||||
AND2_X1 pu_ANDs[Nx];
|
decoder_dualrail<NyC,Ny> d_dr_y(.supply = supply);
|
||||||
(i:Nx:
|
(i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];)
|
||||||
pu_ORs[i].a = _out_acksB[i];
|
|
||||||
pu_ORs[i].b = d_dr_x.out[i];
|
|
||||||
pu_ORs[i].vdd = supply.vdd;
|
|
||||||
pu_ORs[i].vss = supply.vss;
|
|
||||||
|
|
||||||
pu_ANDs[i].a = pu_ORs[i].y;
|
// Validity
|
||||||
pu_ANDs[i].b = reset_B; // TODO buffer
|
vtree<NxC> vtree_x (.supply = supply);
|
||||||
pu_ANDs[i].vdd = supply.vdd;
|
vtree<NyC> vtree_y (.supply = supply);
|
||||||
pu_ANDs[i].vss = supply.vss;
|
(i:0..NxC-1:vtree_x.in.d[i].t = addr_buf.out.d.d[i].t;)
|
||||||
|
(i:0..NxC-1:vtree_x.in.d[i].f = addr_buf.out.d.d[i].f;)
|
||||||
|
(i:0..NyC-1:vtree_y.in.d[i].t = addr_buf.out.d.d[i+NxC].t;)
|
||||||
|
(i:0..NyC-1:vtree_y.in.d[i].f = addr_buf.out.d.d[i+NxC].f;)
|
||||||
|
A_2C_B_X1 C2el(.c1 = vtree_x.out, .c2 = vtree_y.out, .y = addr_buf.out.v,
|
||||||
|
.vdd = supply.vdd, .vss = supply.vss);
|
||||||
|
|
||||||
pu[i].a = pu_ANDs[i].y;
|
|
||||||
pu[i].y = _out_reqsB[i];
|
|
||||||
pu[i].vdd = supply.vdd;
|
|
||||||
pu[i].vss = supply.vss;
|
|
||||||
)
|
|
||||||
|
|
||||||
// ORtree from all output reqs, back to the buffer ack.
|
// and grid for reqs into synapses
|
||||||
// This is instead of the ack that came from the delayed validity trees,
|
and_grid<Nx, Ny> _and_grid(.inx = d_dr_x.out, .iny = d_dr_y.out, .supply = supply);
|
||||||
// in decoder_2d_dly.
|
(i:Nx*Ny: out[i].r = _and_grid.out[i];)
|
||||||
ortree<Nx> _ortree(.out = addr_buf.out.a, .supply = supply);
|
|
||||||
INV_X1 out_req_invs[Nx];
|
|
||||||
(i:Nx:
|
|
||||||
out_req_invs[i].a = _out_reqsB[i];
|
|
||||||
out_req_invs[i].vdd = supply.vdd;
|
|
||||||
out_req_invs[i].vss = supply.vss;
|
|
||||||
|
|
||||||
_ortree.in[i] = out_req_invs[i].y;
|
// Acknowledge pull down time
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Pull DOWNs on the ackB lines by synapses (easier to invert).
|
||||||
|
bool _out_acksB[Nx]; // The vertical output ack lines from each syn.
|
||||||
|
PULLDOWN2_X4 ack_pulldowns[Nx*Ny];
|
||||||
|
pint index;
|
||||||
|
(i:Nx:
|
||||||
|
(j:Ny:
|
||||||
|
index = i + Nx*j;
|
||||||
|
ack_pulldowns[index].a = out[index].a;
|
||||||
|
ack_pulldowns[index].b = d_dr_x.out[i];
|
||||||
|
ack_pulldowns[index].y = _out_acksB[i];
|
||||||
|
ack_pulldowns[index].vss = supply.vss;
|
||||||
|
ack_pulldowns[index].vdd = supply.vdd;
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
// Line end pull UPs (triggered once reqs removed)
|
||||||
|
PULLUP_X4 pu[Nx]; // TODO probably replace this with variable strength PU
|
||||||
|
AND2_X1 pu_ANDs[Nx];
|
||||||
|
(i:Nx:
|
||||||
|
pu_ANDs[i].a = d_dr_x.out[i];
|
||||||
|
pu_ANDs[i].b = reset_B; // TODO buffer
|
||||||
|
pu_ANDs[i].vdd = supply.vdd;
|
||||||
|
pu_ANDs[i].vss = supply.vss;
|
||||||
|
|
||||||
|
pu[i].a = pu_ANDs[i].y;
|
||||||
|
pu[i].y = _out_acksB[i];
|
||||||
|
pu[i].vdd = supply.vdd;
|
||||||
|
pu[i].vss = supply.vss;
|
||||||
|
)
|
||||||
|
|
||||||
|
// ORtree from all output acks, back to the buffer ack.
|
||||||
|
// This is instead of the ack that came from the delayed validity trees,
|
||||||
|
// in decoder_2d_dly.
|
||||||
|
ortree<Nx> _ortree(.out = addr_buf.out.a, .supply = supply);
|
||||||
|
INV_X1 out_ack_invs[Nx];
|
||||||
|
(i:Nx:
|
||||||
|
out_ack_invs[i].a = _out_acksB[i];
|
||||||
|
out_ack_invs[i].vdd = supply.vdd;
|
||||||
|
out_ack_invs[i].vss = supply.vss;
|
||||||
|
|
||||||
|
_ortree.in[i] = out_ack_invs[i].y;
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user