vastly improved lazy synapse handshakes

This commit is contained in:
alexmadison 2022-03-31 12:44:00 +02:00
parent cd5d41d7f8
commit 2e4cdd5029
1 changed files with 156 additions and 193 deletions

View File

@ -53,8 +53,8 @@ namespace tmpl {
* Nc is the number of dualrail input channels.
* Then builds N output AND gates, connecting to the right input wires.
*/
export template<pint Nc, N>
defproc decoder_dualrail (Mx1of2<Nc> in; bool? out[N]; power supply) {
export template<pint Nc, N>
defproc decoder_dualrail (Mx1of2<Nc> in; bool? out[N]; power supply) {
// signal buffers
sigbuf<N> in_tX[Nc];
sigbuf<N> in_fX[Nc];
@ -84,10 +84,10 @@ namespace tmpl {
atree[i].out = out[i];
)
)
}
}
/**
/**
* 2D decoder which uses a configurable delay from the VCtrees to buffer ack.
* Nx is the x size of the decoder array
* NxC is the number of wires in the x channel.
@ -96,8 +96,8 @@ namespace tmpl {
* N_dly_cfg is the number of config bits in the ACK delay line,
* with all bits high corresponding to 2**N_dly_cfg -1 DLY4_X1 cells.
*/
export template<pint NxC, NyC, Nx, Ny, N_dly_cfg>
defproc decoder_2d_dly (avMx1of2<NxC+NyC> in; bool? outx[Nx], outy[Ny],
export template<pint NxC, NyC, Nx, Ny, N_dly_cfg>
defproc decoder_2d_dly (avMx1of2<NxC+NyC> in; bool? outx[Nx], outy[Ny],
dly_cfg[N_dly_cfg], reset_B; power supply) {
// Buffer to recieve concat(x,y) address packet
@ -135,10 +135,10 @@ namespace tmpl {
decoder_dualrail<NyC,Ny> d_dr_y(.out = outy, .supply = supply);
(i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];)
}
}
export template<pint Nx, Ny>
defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
export template<pint Nx, Ny>
defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
AND2_X1 ands[Nx*Ny];
(i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;)
(x:0..Nx-1:
@ -148,10 +148,10 @@ namespace tmpl {
ands[x + y*Nx].y = out[x + y*Nx];
)
)
}
}
/**
/**
* 2D decoder which uses synapse handshaking using line pulldowns.
* Nx is the x size of the decoder array
* NxC is the number of wires in the x channel.
@ -161,8 +161,8 @@ namespace tmpl {
* N_dly is a hard coded delay of the pull down circuit.
* It can be set to 0.
*/
export template<pint NxC, NyC, Nx, Ny, N_dly>
defproc decoder_2d_hs (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? reset_B; power supply) {
export template<pint NxC, NyC, Nx, Ny, N_dly>
defproc decoder_2d_hs (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? reset_B; power supply) {
// Buffer to recieve concat(x,y) address packet
buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
@ -190,86 +190,49 @@ namespace tmpl {
// Acknowledge pull down time
// Pull DOWNs on the reqB lines by synapses (easier to invert).
bool _out_reqsB[Nx], _out_acksB[Nx]; // The vertical output ack lines from each syn.
PULLDOWN2_X4 req_pulldowns[Nx*Ny];
// Pull DOWNs on the ackB lines by synapses (easier to invert).
bool _out_acksB[Nx]; // The vertical output ack lines from each syn.
PULLDOWN2_X4 ack_pulldowns[Nx*Ny];
pint index;
(i:Nx:
(j:Ny:
index = i + Nx*j;
req_pulldowns[index].a = out[index].a;
req_pulldowns[index].b = _out_acksB[i];
req_pulldowns[index].y = _out_reqsB[i];
req_pulldowns[index].vss = supply.vss;
req_pulldowns[index].vdd = supply.vdd;
ack_pulldowns[index].a = out[index].a;
ack_pulldowns[index].b = d_dr_x.out[i];
ack_pulldowns[index].y = _out_acksB[i];
ack_pulldowns[index].vss = supply.vss;
ack_pulldowns[index].vdd = supply.vdd;
)
)
// ReqB keep cells
KEEP_X1 req_keeps[Nx];
(i:Nx:
req_keeps[i].y = _out_reqsB[i];
req_keeps[i].vdd = supply.vdd;
req_keeps[i].vss = supply.vss;
)
// req-ack buffers
// Delay needed here, since otherwise the pull up of reqB happens too quickly.
// Means that the pull up may start fighting the synapse,
// since the synapse has not yet retracted its ack.
// Also there is the possibility, if really fast, that the line pull up block
// doesn't yet see that the input is valid, and starts pulling up.
// In any case, this delay is important.
sigbuf<Ny> req_bufs[Nx];
delay_chain<N_dly> ack_delays[Nx];
(i:Nx:
ack_delays[i].in = _out_reqsB[i];
ack_delays[i].supply = supply;
// req_bufs[i].in = _out_reqsB[i];
req_bufs[i].in = ack_delays[i].out;
req_bufs[i].out[0] = _out_acksB[i]; // DANGER DANGER
req_bufs[i].supply = supply;
)
// Line end pull UPs (triggered once synapse reqs removed)
OR2_X1 pu_ORs[Nx];
// Line end pull UPs (triggered once reqs removed)
PULLUP_X4 pu[Nx]; // TODO probably replace this with variable strength PU
AND2_X1 pu_ANDs[Nx];
(i:Nx:
pu_ORs[i].a = _out_acksB[i];
pu_ORs[i].b = d_dr_x.out[i];
pu_ORs[i].vdd = supply.vdd;
pu_ORs[i].vss = supply.vss;
pu_ANDs[i].a = pu_ORs[i].y;
pu_ANDs[i].a = d_dr_x.out[i];
pu_ANDs[i].b = reset_B; // TODO buffer
pu_ANDs[i].vdd = supply.vdd;
pu_ANDs[i].vss = supply.vss;
pu[i].a = pu_ANDs[i].y;
pu[i].y = _out_reqsB[i];
pu[i].y = _out_acksB[i];
pu[i].vdd = supply.vdd;
pu[i].vss = supply.vss;
)
// ORtree from all output reqs, back to the buffer ack.
// ORtree from all output acks, back to the buffer ack.
// This is instead of the ack that came from the delayed validity trees,
// in decoder_2d_dly.
ortree<Nx> _ortree(.out = addr_buf.out.a, .supply = supply);
INV_X1 out_req_invs[Nx];
INV_X1 out_ack_invs[Nx];
(i:Nx:
out_req_invs[i].a = _out_reqsB[i];
out_req_invs[i].vdd = supply.vdd;
out_req_invs[i].vss = supply.vss;
out_ack_invs[i].a = _out_acksB[i];
out_ack_invs[i].vdd = supply.vdd;
out_ack_invs[i].vss = supply.vss;
_ortree.in[i] = out_req_invs[i].y;
_ortree.in[i] = out_ack_invs[i].y;
)
}
}