Merge branch 'dev' into HEAD
This commit is contained in:
@ -26,6 +26,33 @@
|
||||
|
||||
namespace tmpl {
|
||||
namespace dataflow_neuro{
|
||||
export defcell KEEP_X1 (bool y; bool vdd, vss) {
|
||||
bool _y;
|
||||
prs{
|
||||
y => _y-
|
||||
[weak=1] _y => y-
|
||||
}
|
||||
sizing {
|
||||
leak_adjust <- 1;
|
||||
p_n_mode <- 1;
|
||||
y {-1}; _y{-1}
|
||||
}
|
||||
}
|
||||
|
||||
export defcell A_1C2N_RB_X1 (bool! y; bool? c1,n1,n2,pr_B, sr_B; bool vdd, vss) {
|
||||
bool _y;
|
||||
prs{
|
||||
(~c1)|~pr_B -> _y+
|
||||
c1 & n1 & n2 & sr_B -> _y-
|
||||
_y => y-
|
||||
}
|
||||
sizing {
|
||||
leak_adjust <- 1;
|
||||
p_n_mode <- 1;
|
||||
y {-1}; _y{-1}
|
||||
}
|
||||
}
|
||||
|
||||
export defcell A_1C1P2N_RB_X1 (bool! y; bool? c1,p1,n1,n2,pr_B, sr_B; bool vdd, vss) {
|
||||
bool _y;
|
||||
prs{
|
||||
@ -41,6 +68,22 @@ namespace tmpl {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export defcell A_2C1P1N_RB_X1 (bool! y; bool? c1,c2,p1,n1,pr_B,sr_B; bool vdd, vss) {
|
||||
bool _y;
|
||||
prs{
|
||||
(~p1 & ~c1 & ~c2)|~pr_B -> _y+
|
||||
c1 & c2 & n1 & sr_B -> _y-
|
||||
_y => y-
|
||||
|
||||
}
|
||||
sizing {
|
||||
leak_adjust <- 1;
|
||||
p_n_mode <- 1;
|
||||
y {-1}; _y{-1}
|
||||
}
|
||||
}
|
||||
|
||||
export defcell A_1C1P2N_R_X1 (bool! y; bool? c1,p1,n1,n2,pr_B, sr_B; bool vdd, vss) {
|
||||
prs{
|
||||
(~p1 & ~c1)|~pr_B -> y-
|
||||
|
@ -48,6 +48,45 @@ open std::data;
|
||||
namespace tmpl {
|
||||
namespace dataflow_neuro {
|
||||
|
||||
/**
|
||||
* Dualrail decoder.
|
||||
* Nc is the number of dualrail input channels.
|
||||
* Then builds N output AND gates, connecting to the right input wires.
|
||||
*/
|
||||
export template<pint Nc, N>
|
||||
defproc decoder_dualrail (Mx1of2<Nc> in; bool? out[N]; power supply) {
|
||||
// signal buffers
|
||||
sigbuf<N> in_tX[Nc];
|
||||
sigbuf<N> in_fX[Nc];
|
||||
(i:Nc:
|
||||
in_tX[i].supply = supply;
|
||||
in_tX[i].in = in.d[i].t;
|
||||
|
||||
in_fX[i].supply = supply;
|
||||
in_fX[i].in = in.d[i].f;
|
||||
)
|
||||
|
||||
// AND trees
|
||||
pint bitval;
|
||||
andtree<Nc> atree[N];
|
||||
(k:0..N-1:atree[k].supply = supply;)
|
||||
(i:0..N-1:
|
||||
(j:0..Nc-1:
|
||||
bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
|
||||
[bitval = 1 ->
|
||||
atree[i].in[j] = in_tX[j].out[i];
|
||||
// atree[i].in[j] = addr_buf.out.d.d[j].t;
|
||||
[]bitval = 0 ->
|
||||
atree[i].in[j] = in_fX[j].out[i];
|
||||
// atree[i].in[j] = addr_buf.out.d.d[j].f;
|
||||
[]bitval >= 2 -> {false : "fuck"};
|
||||
]
|
||||
atree[i].out = out[i];
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 2D decoder which uses a configurable delay from the VCtrees to buffer ack.
|
||||
* Nx is the x size of the decoder array
|
||||
@ -63,9 +102,6 @@ namespace tmpl {
|
||||
|
||||
// Buffer to recieve concat(x,y) address packet
|
||||
buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
|
||||
// NEED TO BUFFER OUTPUTS FROM BUFFER I RECKON
|
||||
|
||||
|
||||
|
||||
// Validity trees
|
||||
vtree<NxC> vtree_x (.supply = supply);
|
||||
@ -91,39 +127,140 @@ namespace tmpl {
|
||||
// FOR TESTING PURPOSES
|
||||
// !!!!!!!!!!!!!!!!
|
||||
|
||||
// AND trees
|
||||
pint bitval;
|
||||
andtree<NxC> atree_x[Nx];
|
||||
(k:0..Nx-1:atree_x[k].supply = supply;)
|
||||
(i:0..Nx-1:
|
||||
(j:0..NxC-1:
|
||||
bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
|
||||
[bitval = 1 ->
|
||||
atree_x[i].in[j] = addr_buf.out.d.d[j].t;
|
||||
[]bitval = 0 ->
|
||||
atree_x[i].in[j] = addr_buf.out.d.d[j].f;
|
||||
[]bitval >= 2 -> {false : "fuck"};
|
||||
]
|
||||
atree_x[i].out = outx[i];
|
||||
)
|
||||
)
|
||||
|
||||
andtree<NyC> atree_y[Ny];
|
||||
(k:0..Ny-1:atree_y[k].supply = supply;)
|
||||
(i:0..Ny-1:
|
||||
(j:0..NyC-1:
|
||||
bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
|
||||
[bitval = 1 ->
|
||||
atree_y[i].in[j] = addr_buf.out.d.d[j+NxC].t;
|
||||
[]bitval = 0 ->
|
||||
atree_y[i].in[j] = addr_buf.out.d.d[j+NxC].f;
|
||||
]
|
||||
atree_y[i].out = outy[i];
|
||||
)
|
||||
)
|
||||
// Decoder X/Y And trees
|
||||
decoder_dualrail<NxC,Nx> d_dr_x(.out = outx, .supply = supply);
|
||||
(i:0..NxC-1:d_dr_x.in.d[i] = addr_buf.out.d.d[i];)
|
||||
|
||||
decoder_dualrail<NyC,Ny> d_dr_y(.out = outy, .supply = supply);
|
||||
(i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];)
|
||||
|
||||
}
|
||||
|
||||
export template<pint Nx, Ny>
|
||||
defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
|
||||
AND2_X1 ands[Nx*Ny];
|
||||
(i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;)
|
||||
(x:0..Nx-1:
|
||||
(y:0..Ny-1:
|
||||
ands[x + y*Nx].a = inx[x];
|
||||
ands[x + y*Nx].b = iny[y];
|
||||
ands[x + y*Nx].y = out[x + y*Nx];
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 2D decoder which uses synapse handshaking using line pulldowns.
|
||||
* Nx is the x size of the decoder array
|
||||
* NxC is the number of wires in the x channel.
|
||||
* but my guess is that we can't do logs...
|
||||
* the req on a1of1 out is the req to each synapse.
|
||||
* The ack back from each line should go high when the synapse is charged.
|
||||
* N_dly is a hard coded delay of the pull down circuit.
|
||||
* It can be set to 0.
|
||||
*/
|
||||
export template<pint NxC, NyC, Nx, Ny, N_dly>
|
||||
defproc decoder_2d_hs (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? reset_B; power supply) {
|
||||
|
||||
// Buffer to recieve concat(x,y) address packet
|
||||
buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
|
||||
|
||||
// Decoder X/Y And trees
|
||||
decoder_dualrail<NxC,Nx> d_dr_x(.supply = supply);
|
||||
(i:0..NxC-1:d_dr_x.in.d[i] = addr_buf.out.d.d[i];)
|
||||
decoder_dualrail<NyC,Ny> d_dr_y(.supply = supply);
|
||||
(i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];)
|
||||
|
||||
// Validity
|
||||
vtree<NxC> vtree_x (.supply = supply);
|
||||
vtree<NyC> vtree_y (.supply = supply);
|
||||
(i:0..NxC-1:vtree_x.in.d[i].t = addr_buf.out.d.d[i].t;)
|
||||
(i:0..NxC-1:vtree_x.in.d[i].f = addr_buf.out.d.d[i].f;)
|
||||
(i:0..NyC-1:vtree_y.in.d[i].t = addr_buf.out.d.d[i+NxC].t;)
|
||||
(i:0..NyC-1:vtree_y.in.d[i].f = addr_buf.out.d.d[i+NxC].f;)
|
||||
A_2C_B_X1 C2el(.c1 = vtree_x.out, .c2 = vtree_y.out, .y = addr_buf.out.v,
|
||||
.vdd = supply.vdd, .vss = supply.vss);
|
||||
|
||||
|
||||
// and grid for reqs into synapses
|
||||
and_grid<Nx, Ny> _and_grid(.inx = d_dr_x.out, .iny = d_dr_y.out, .supply = supply);
|
||||
(i:Nx*Ny: out[i].r = _and_grid.out[i];)
|
||||
|
||||
// Acknowledge pull down time
|
||||
|
||||
// Pull UPs on the reqB lines by synapses (easier to invert).
|
||||
bool _out_reqsB[Nx], _out_acksB[Nx]; // The vertical output ack lines from each syn.
|
||||
PULLDOWN2_X4 req_pulldowns[Nx*Ny];
|
||||
pint index;
|
||||
(i:Nx:
|
||||
(j:Ny:
|
||||
index = i + Nx*j;
|
||||
req_pulldowns[index].a = out[index].a;
|
||||
req_pulldowns[index].b = _out_acksB[i];
|
||||
req_pulldowns[index].y = _out_reqsB[i];
|
||||
req_pulldowns[index].vss = supply.vss;
|
||||
req_pulldowns[index].vdd = supply.vdd;
|
||||
)
|
||||
)
|
||||
|
||||
// ReqB keep cells
|
||||
KEEP_X1 req_keeps[Nx];
|
||||
(i:Nx:
|
||||
req_keeps[i].y = _out_reqsB[i];
|
||||
req_keeps[i].vdd = supply.vdd;
|
||||
req_keeps[i].vss = supply.vss;
|
||||
)
|
||||
|
||||
// req-ack buffers
|
||||
sigbuf<Ny> req_bufs[Nx];
|
||||
(i:Nx:
|
||||
req_bufs[i].in = _out_reqsB[i];
|
||||
req_bufs[i].out[0] = _out_acksB[i]; // DANGER DANGER
|
||||
req_bufs[i].supply = supply;
|
||||
)
|
||||
|
||||
// Line end pull UPs (triggered once synapse reqs removed)
|
||||
delay_fifo<N_dly> pu_dlys[Nx];
|
||||
OR2_X1 pu_ORs[Nx];
|
||||
PULLUP_X4 pu[Nx]; // TODO probably replace this with variable strength PU
|
||||
AND2_X1 pu_ANDs[Nx];
|
||||
(i:Nx:
|
||||
pu_dlys[i].in = _out_acksB[i];
|
||||
pu_dlys[i].supply = supply;
|
||||
|
||||
pu_ORs[i].a = pu_dlys[i].out;
|
||||
pu_ORs[i].b = d_dr_x.out[i];
|
||||
pu_ORs[i].vdd = supply.vdd;
|
||||
pu_ORs[i].vss = supply.vss;
|
||||
|
||||
pu_ANDs[i].a = pu_ORs[i].y;
|
||||
pu_ANDs[i].b = reset_B; // TODO buffer
|
||||
pu_ANDs[i].vdd = supply.vdd;
|
||||
pu_ANDs[i].vss = supply.vss;
|
||||
|
||||
pu[i].a = pu_ANDs[i].y;
|
||||
pu[i].y = _out_reqsB[i];
|
||||
pu[i].vdd = supply.vdd;
|
||||
pu[i].vss = supply.vss;
|
||||
)
|
||||
|
||||
// ORtree from all output reqs, back to the buffer ack.
|
||||
// This is instead of the ack that came from the delayed validity trees,
|
||||
// in decoder_2d_dly.
|
||||
ortree<Nx> _ortree(.out = addr_buf.out.a, .supply = supply);
|
||||
INV_X1 out_req_invs[Nx];
|
||||
(i:Nx:
|
||||
out_req_invs[i].a = _out_reqsB[i];
|
||||
out_req_invs[i].vdd = supply.vdd;
|
||||
out_req_invs[i].vss = supply.vss;
|
||||
|
||||
_ortree.in[i] = out_req_invs[i].y;
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
@ -224,21 +361,6 @@ namespace tmpl {
|
||||
|
||||
}
|
||||
|
||||
|
||||
export template<pint Nx, Ny>
|
||||
defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
|
||||
AND2_X1 ands[Nx*Ny];
|
||||
(i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;)
|
||||
(x:0..Nx-1:
|
||||
(y:0..Ny-1:
|
||||
ands[x + y*Nx].a = inx[x];
|
||||
ands[x + y*Nx].b = iny[y];
|
||||
ands[x + y*Nx].y = out[x + y*Nx];
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
// Generates the OR-trees required to go from
|
||||
// N one-hot inputs to Nc dual rail binary encoding.
|
||||
export template<pint Nc, N>
|
||||
@ -348,7 +470,7 @@ namespace tmpl {
|
||||
|
||||
|
||||
export template<pint NxC, NyC, Nx, Ny, ACK_STRENGTH>
|
||||
defproc encoder2D(a1of1 x[Nx]; a1of1 y[Ny]; avMx1of2<(NxC + NyC)> out; power supply; bool reset_B) {
|
||||
defproc encoder2D(a1of1 inx[Nx]; a1of1 iny[Ny]; avMx1of2<(NxC + NyC)> out; power supply; bool reset_B) {
|
||||
// Reset buffers
|
||||
pint H = 2*(NxC + NyC); //Reset strength? to be investigated
|
||||
bool _reset_BX,_reset_BXX[H];
|
||||
@ -359,10 +481,10 @@ namespace tmpl {
|
||||
a1of1 _arb_out_x, _arb_out_y;
|
||||
a1of1 _x_temp[Nx],_y_temp[Ny]; // For wiring the reqs to the arbtrees
|
||||
(i:Nx:
|
||||
_x_temp[i].r = x[i].r;
|
||||
_x_temp[i].r = inx[i].r;
|
||||
)
|
||||
(i:Ny:
|
||||
_y_temp[i].r = y[i].r;
|
||||
_y_temp[i].r = iny[i].r;
|
||||
)
|
||||
arbtree<Nx> Xarb(.in = _x_temp,.out = _arb_out_x,.supply = supply);
|
||||
arbtree<Ny> Yarb(.in = _y_temp,.out = _arb_out_y,.supply = supply);
|
||||
@ -372,12 +494,12 @@ namespace tmpl {
|
||||
sigbuf_1output<ACK_STRENGTH> y_ack_arb[Ny];
|
||||
(i:Nx:
|
||||
x_ack_arb[i].in = _x_temp[i].a;
|
||||
x_ack_arb[i].out = x[i].a;
|
||||
x_ack_arb[i].out = inx[i].a;
|
||||
x_ack_arb[i].supply = supply;
|
||||
)
|
||||
(i:Ny:
|
||||
y_ack_arb[i].in = _y_temp[i].a;
|
||||
y_ack_arb[i].out = y[i].a;
|
||||
y_ack_arb[i].out = iny[i].a;
|
||||
y_ack_arb[i].supply = supply;
|
||||
)
|
||||
|
||||
@ -408,7 +530,7 @@ namespace tmpl {
|
||||
|
||||
// X_req ORtree
|
||||
bool _x_req_array[Nx], _x_v_B;
|
||||
(i:Nx:_x_req_array[i] = x[i].r;)
|
||||
(i:Nx:_x_req_array[i] = inx[i].r;)
|
||||
ortree<Nx> x_req_ortree(.in = _x_req_array,.out = _x_v,.supply = supply); //todo BUFF
|
||||
INV_X1 not_x_req_ortree(.a = _x_v,.y = _x_v_B);
|
||||
|
||||
@ -450,17 +572,17 @@ namespace tmpl {
|
||||
// Encoders
|
||||
bool x_acks[Nx];
|
||||
Mx1of2<NxC> x_enc_out;
|
||||
(i:Nx:x_acks[i] = x[i].a;)
|
||||
(i:Nx:x_acks[i] = inx[i].a;)
|
||||
dualrail_encoder<NxC, Nx> x_encoder(.in = x_acks, .out = x_enc_out, .supply = supply);
|
||||
|
||||
bool y_acks[Nx];
|
||||
bool y_acks[Ny];
|
||||
Mx1of2<NyC> y_enc_out;
|
||||
(i:Ny:y_acks[i] = y[i].a;)
|
||||
(i:Ny:y_acks[i] = iny[i].a;)
|
||||
dualrail_encoder<NyC, Ny> y_encoder(.in = y_acks, .out = y_enc_out, .supply = supply);
|
||||
|
||||
// Valid trees
|
||||
vtree<NxC> vtree_x(.in = x_enc_out, .out = _in_x_v, .supply = supply);
|
||||
vtree<NxC> vtree_y(.in = y_enc_out, .out = _in_y_v, .supply = supply);
|
||||
vtree<NyC> vtree_y(.in = y_enc_out, .out = _in_y_v, .supply = supply);
|
||||
|
||||
// Buffer func thing
|
||||
Mx1of2<NxC + NyC> into_buffer;
|
||||
@ -474,6 +596,185 @@ namespace tmpl {
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Neuron handshaking.
|
||||
* Looks for a rising edge on the neuron req.
|
||||
* Then performs a 2d handshake out outy then outx.
|
||||
*/
|
||||
export
|
||||
defproc nrn_hs_2D(a1of1 in; a1of1 outx; a1of1 outy; power supply; bool reset_B) {
|
||||
bool _reset_BX;
|
||||
BUF_X2 reset_buf(.a = reset_B, .y = _reset_BX, .vdd = supply.vdd, .vss = supply.vss);
|
||||
|
||||
bool _en, _req;
|
||||
|
||||
// A_1C2N_RB_X1 A_ack(.c1 = _en, .n1 = _req, .n2 = in.r, .y = in.a,
|
||||
// .pr_B = _reset_BX, .sr_B = _reset_BX, .vss = supply.vss, .vdd = supply.vdd);
|
||||
|
||||
// Switched it back
|
||||
// Because had the problem that if the req was not removed in time,
|
||||
// it would be recounted as a double spike,
|
||||
// since in.req is still high after the out has been dealt with.
|
||||
A_2C1N_RB_X1 A_ack(.c1 = _en, .c2 = in.r, .n1 = _req, .y = in.a,
|
||||
.pr_B = _reset_BX, .sr_B = _reset_BX, .vss = supply.vss, .vdd = supply.vdd);
|
||||
|
||||
|
||||
A_1C1P_X1 A_en(.p1 = _req, .c1 = in.a, .y = _en,
|
||||
.vss = supply.vss, .vdd = supply.vdd);
|
||||
|
||||
bool _y_a_B, _x_a_B;
|
||||
INV_X2 inv_x(.a = outx.a, .y = _x_a_B, .vss = supply.vss, .vdd = supply.vdd);
|
||||
INV_X2 inv_y(.a = outy.a, .y = _y_a_B, .vss = supply.vss, .vdd = supply.vdd);
|
||||
|
||||
A_2C1P1N_RB_X1 A_req(.p1 = _x_a_B, .c1 = _en, .c2 = _y_a_B, .n1 = in.r, .y = _req,
|
||||
.pr_B = _reset_BX, .sr_B = _reset_BX, .vdd = supply.vdd, .vss = supply.vss);
|
||||
|
||||
// y_req pull up
|
||||
NAND2_X1 nand_y(.a = _y_a_B, .b = _req, .vdd = supply.vdd, .vss = supply.vss);
|
||||
PULLUP_X4 pu_y(.a = nand_y.y, .y = outy.r, .vdd = supply.vdd, .vss = supply.vss);
|
||||
|
||||
// x_req pull up
|
||||
NAND3_X1 nand_x(.a = _x_a_B, .b = _req, .c = outy.a, .vdd = supply.vdd, .vss = supply.vss);
|
||||
PULLUP_X4 pu_x(.a = nand_x.y, .y = outx.r, .vdd = supply.vdd, .vss = supply.vss);
|
||||
}
|
||||
|
||||
|
||||
|
||||
export
|
||||
defproc nrn_line_end_pull_down (bool? in; bool? reset_B; power supply; bool! out)
|
||||
{
|
||||
bool _out, __out, nand_out;
|
||||
BUF_X1 buf1(.a=in, .y=_out, .vdd=supply.vdd,.vss=supply.vss);
|
||||
BUF_X1 buf2(.a=_out, .y=__out, .vdd=supply.vdd,.vss=supply.vss);
|
||||
INV_X1 inv(.a = __out, .vdd=supply.vdd,.vss =supply.vss);
|
||||
|
||||
NAND2_X1 aenor(.a=inv.y, .b=reset_B, .y = nand_out, .vdd=supply.vdd,.vss=supply.vss);
|
||||
|
||||
PULLDOWN_X4 pull_down(.a=nand_out, .y=out);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* A 2d grid of neuron handshakers.
|
||||
* Should then slot into the encoder.
|
||||
* Each neuron has an a1of1 channel (in), which is tripped when a neuron spikes.
|
||||
* N_dly is number of delay elements to add to line pull down,
|
||||
* for the purpose of running ACT sims.
|
||||
* It should probably be set to 0 though.
|
||||
*/
|
||||
export template<pint Nx, Ny, N_dly>
|
||||
defproc nrn_hs_2D_array(a1of1 in[Nx*Ny]; a1of1 outx[Nx], outy[Ny];
|
||||
power supply; bool reset_B) {
|
||||
|
||||
// Make hella signal buffers
|
||||
sigbuf<Ny> rsbx(.in = reset_B, .supply = supply);
|
||||
sigbuf<Nx> rsb[Ny]; // ResetSigBuf
|
||||
(j:Ny:
|
||||
rsb[j].in = rsbx.out[j];
|
||||
rsb[j].supply = supply;
|
||||
)
|
||||
|
||||
// Add buffers on output req lines
|
||||
a1of1 _outx[Nx], _outy[Ny];
|
||||
BUF_X4 out_req_buf_x[Nx];
|
||||
(i:Nx:
|
||||
out_req_buf_x[i].vss = supply.vss;
|
||||
out_req_buf_x[i].vdd = supply.vdd;
|
||||
out_req_buf_x[i].a = _outx[i].r;
|
||||
out_req_buf_x[i].y = outx[i].r;
|
||||
)
|
||||
BUF_X4 out_req_buf_y[Ny];
|
||||
(i:Ny:
|
||||
out_req_buf_y[i].vss = supply.vss;
|
||||
out_req_buf_y[i].vdd = supply.vdd;
|
||||
out_req_buf_y[i].a = _outy[i].r;
|
||||
out_req_buf_y[i].y = outy[i].r;
|
||||
)
|
||||
// Add buffers on output ack lines
|
||||
// Note that this should be generalised.
|
||||
// And probably won't even be done by ACT/innovus anwyay
|
||||
// TODO: do it properly with sigbufs?
|
||||
BUF_X4 out_ack_buf_x[Nx];
|
||||
(i:Nx:
|
||||
out_ack_buf_x[i].vss = supply.vss;
|
||||
out_ack_buf_x[i].vdd = supply.vdd;
|
||||
out_ack_buf_x[i].a = outx[i].a;
|
||||
out_ack_buf_x[i].y = _outx[i].a;
|
||||
)
|
||||
BUF_X4 out_ack_buf_y[Ny];
|
||||
(i:Ny:
|
||||
out_ack_buf_y[i].vss = supply.vss;
|
||||
out_ack_buf_y[i].vdd = supply.vdd;
|
||||
out_ack_buf_y[i].a = outy[i].a;
|
||||
out_ack_buf_y[i].y = _outy[i].a;
|
||||
)
|
||||
|
||||
|
||||
|
||||
// Create handshake grid
|
||||
pint index;
|
||||
nrn_hs_2D neurons[Nx*Ny];
|
||||
(i:0..Nx-1:
|
||||
(j:0..Ny-1:
|
||||
index = i + j*Nx;
|
||||
neurons[index].supply = supply;
|
||||
neurons[index].reset_B = rsb[j].out[i];
|
||||
neurons[index].in = in[index];
|
||||
neurons[index].outx = _outx[i];
|
||||
neurons[index].outy = _outy[j];
|
||||
)
|
||||
)
|
||||
|
||||
// Create delay fifos to emulate the fact that the line pull downs
|
||||
// are at the end of the line, and thus slow.
|
||||
// Note that if N_dly = 0, delay fifo is just a pipe.
|
||||
delay_fifo<N_dly> dly_x[Nx];
|
||||
delay_fifo<N_dly> dly_y[Ny];
|
||||
|
||||
// Create x line req pull downs
|
||||
nrn_line_end_pull_down pd_x[Nx];
|
||||
sigbuf<Nx> rsb_pd_x(.in = reset_B, .supply = supply);
|
||||
(i:0..Nx-1:
|
||||
dly_x[i].supply = supply;
|
||||
dly_x[i].in = _outx[i].a;
|
||||
pd_x[i].in = dly_x[i].out;
|
||||
|
||||
pd_x[i].out = _outx[i].r;
|
||||
pd_x[i].reset_B = rsb_pd_x.out[i];
|
||||
pd_x[i].supply = supply;
|
||||
)
|
||||
|
||||
// Create y line req pull downs
|
||||
nrn_line_end_pull_down pd_y[Ny];
|
||||
sigbuf<Ny> rsb_pd_y(.in = reset_B, .supply = supply);
|
||||
(j:0..Ny-1:
|
||||
dly_y[j].supply = supply;
|
||||
dly_y[j].in = _outy[j].a;
|
||||
pd_y[j].in = dly_y[j].out;
|
||||
|
||||
pd_y[j].out = _outy[j].r;
|
||||
pd_y[j].reset_B = rsb_pd_y.out[j];
|
||||
pd_y[j].supply = supply;
|
||||
)
|
||||
|
||||
// Add keeps
|
||||
KEEP_X1 keep_x[Nx];
|
||||
(i:Nx:
|
||||
keep_x[i].vdd = supply.vdd;
|
||||
keep_x[i].vss = supply.vss;
|
||||
keep_x[i].y = _outx[i].r;
|
||||
)
|
||||
|
||||
KEEP_X1 keep_y[Ny];
|
||||
(j:Ny:
|
||||
keep_y[j].vdd = supply.vdd;
|
||||
keep_y[j].vss = supply.vss;
|
||||
keep_y[j].y = _outy[j].r;
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
167
dataflow_neuro/interfaces.act
Normal file
167
dataflow_neuro/interfaces.act
Normal file
@ -0,0 +1,167 @@
|
||||
/*************************************************************************
|
||||
*
|
||||
* This file is part of ACT dataflow neuro library
|
||||
*
|
||||
* Copyright (c) 2022 University of Groningen - Ole Richter
|
||||
* Copyright (c) 2022 University of Groningen - Michele Mastella
|
||||
* Copyright (c) 2022 University of Groningen - Hugh Greatorex
|
||||
* Copyright (c) 2022 University of Groningen - Madison Cotteret
|
||||
*
|
||||
*
|
||||
* This source describes Open Hardware and is licensed under the CERN-OHL-W v2 or later
|
||||
*
|
||||
* You may redistribute and modify this documentation and make products
|
||||
* using it under the terms of the CERN-OHL-W v2 (https:/cern.ch/cern-ohl).
|
||||
* This documentation is distributed WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTY, INCLUDING OF MERCHANTABILITY, SATISFACTORY QUALITY
|
||||
* AND FITNESS FOR A PARTICULAR PURPOSE. Please see the CERN-OHL-W v2
|
||||
* for applicable conditions.
|
||||
*
|
||||
* Source location: https://git.web.rug.nl/bics/actlib_dataflow_neuro
|
||||
*
|
||||
* As per CERN-OHL-W v2 section 4.1, should You produce hardware based on
|
||||
* these sources, You must maintain the Source Location visible in its
|
||||
* documentation.
|
||||
*
|
||||
**************************************************************************
|
||||
*/
|
||||
import "../../dataflow_neuro/cell_lib_async.act";
|
||||
import "../../dataflow_neuro/cell_lib_std.act";
|
||||
import "../../dataflow_neuro/treegates.act";
|
||||
import "../../dataflow_neuro/primitives.act";
|
||||
import std::channel;
|
||||
open std::channel;
|
||||
|
||||
// import std::func;
|
||||
open std;
|
||||
|
||||
import std::data;
|
||||
open std::data;
|
||||
|
||||
|
||||
namespace tmpl {
|
||||
namespace dataflow_neuro {
|
||||
|
||||
/**
|
||||
* Bundled data (non dual rail, with req)
|
||||
* 2
|
||||
* quasi delay insensitive channel (dual rail).
|
||||
* Basically a buffer with a bitwise conversion in front of it.
|
||||
*/
|
||||
export template<pint N, N_dly_cfg>
|
||||
defproc bd2qdi(bd<N> in; avMx1of2<N> out; bool? dly_cfg[N_dly_cfg]; power supply; bool? reset_B) {
|
||||
// Delay on req_in
|
||||
bool _req;
|
||||
delayprog<N_dly_cfg> dly(.in = in.r, .out = _req, .s = dly_cfg, .supply = supply);
|
||||
|
||||
// sig buff the reset signal
|
||||
bool _reset_BX, _reset_BXX[N];
|
||||
BUF_X4 reset_buf(.a=reset_B, .y=_reset_BX,.vdd=supply.vdd,.vss=supply.vss);
|
||||
sigbuf<N> reset_bufarray(.in=_reset_BX, .out=_reset_BXX, .supply=supply);
|
||||
|
||||
// sig buff the req
|
||||
bool _reqX, _reqXX[N];
|
||||
BUF_X4 req_buf(.a=_req, .y=_reqX,.vdd=supply.vdd,.vss=supply.vss);
|
||||
sigbuf<N> req_bufarray(.in=_reqX, .out=_reqXX, .supply=supply);
|
||||
|
||||
// bd2qdi conversion
|
||||
// Each line goes to a t pin, its not to a f.
|
||||
bool _inB[N];
|
||||
INV_X1 input_invs[N];
|
||||
(i:N:
|
||||
input_invs[i].a = in.d[i];
|
||||
input_invs[i].y = _inB[i];
|
||||
input_invs[i].vss = supply.vss;
|
||||
input_invs[i].vdd = supply.vdd;
|
||||
)
|
||||
|
||||
// BUFFER
|
||||
// Basically the buffer_s but with the validity tree ripped out
|
||||
// and just connected to in_req instead.
|
||||
|
||||
// And probably need a delay on the in_ack to ensure en has time to disable
|
||||
// before the inputs go to another state.
|
||||
// Actually apparently no: there is a fixed, huge delay, already incurred
|
||||
// by communicating with pads-> uC -> windows 95 and back again.
|
||||
|
||||
// Since the input is never invalid, also need a mechanism
|
||||
// for the output to become invalid, when an out_ack is received.
|
||||
|
||||
//control
|
||||
bool _en;
|
||||
A_3C_RB_X4 inack_ctl(.c1=_en,.c2=_reqX,.c3=out.v,.y=in.a,.pr_B=_reset_BX,.sr_B=_reset_BX,.vdd=supply.vdd,.vss=supply.vss);
|
||||
A_1C1P_X1 en_ctl(.c1=in.a,.p1=out.v,.y=_en,.vdd=supply.vdd,.vss=supply.vss);
|
||||
|
||||
//function
|
||||
bool _out_a_BX_t[N],_out_a_BX_f[N],_out_a_B,_en_X_t[N],_en_X_f[N];
|
||||
A_2C2N_RB_X4 f_buf_func[N];
|
||||
A_2C2N_RB_X4 t_buf_func[N];
|
||||
sigbuf<N> en_buf_t(.in=_en, .out=_en_X_t, .supply=supply);
|
||||
sigbuf<N> en_buf_f(.in=_en, .out=_en_X_f, .supply=supply);
|
||||
INV_X1 out_a_inv(.a=out.a,.y=_out_a_B, .vss = supply.vss, .vdd = supply.vdd);
|
||||
sigbuf<N> out_a_B_buf_f(.in=_out_a_B,.out=_out_a_BX_t, .supply=supply);
|
||||
sigbuf<N> out_a_B_buf_t(.in=_out_a_B,.out=_out_a_BX_f, .supply=supply);
|
||||
// check if you can also do single var to array connect a=b[N]
|
||||
// and remove them from the loop
|
||||
(i:N:
|
||||
f_buf_func[i].y=out.d.d[i].f;
|
||||
t_buf_func[i].y=out.d.d[i].t;
|
||||
f_buf_func[i].c1=_en_X_f[i];
|
||||
t_buf_func[i].c1=_en_X_t[i];
|
||||
f_buf_func[i].c2=_out_a_BX_f[i];
|
||||
t_buf_func[i].c2=_out_a_BX_t[i];
|
||||
f_buf_func[i].n1=_inB[i];
|
||||
t_buf_func[i].n1=in.d[i];
|
||||
f_buf_func[i].n2=_reqXX[i];
|
||||
t_buf_func[i].n2=_reqXX[i];
|
||||
f_buf_func[i].vdd=supply.vdd;
|
||||
t_buf_func[i].vdd=supply.vdd;
|
||||
f_buf_func[i].vss=supply.vss;
|
||||
t_buf_func[i].vss=supply.vss;
|
||||
t_buf_func[i].pr_B = _reset_BXX[i];
|
||||
t_buf_func[i].sr_B = _reset_BXX[i];
|
||||
f_buf_func[i].pr_B = _reset_BXX[i];
|
||||
f_buf_func[i].sr_B = _reset_BXX[i];
|
||||
)
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* quasi delay insensitive channel (dual rail).
|
||||
* 2
|
||||
* Bundled data (non dual rail, with req)
|
||||
*/
|
||||
export template<pint N, N_dly_cfg>
|
||||
defproc qdi2bd(avMx1of2<N> in; bd<N> out; bool? dly_cfg[N_dly_cfg]; power supply; bool? reset_B) {
|
||||
|
||||
// Buffer
|
||||
buffer<N> buf(.in = in, .supply = supply, .reset_B = reset_B);
|
||||
buf.out.a = out.a;
|
||||
|
||||
// Vtree
|
||||
vtree<N> out_vtree(.supply = supply);
|
||||
(i:N:
|
||||
out_vtree.in.d[i].t = buf.out.d.d[i].t;
|
||||
out_vtree.in.d[i].f = buf.out.d.d[i].f;
|
||||
)
|
||||
buf.out.v = out_vtree.out;
|
||||
|
||||
// Delay
|
||||
delayprog<N_dly_cfg> dly(.in = out_vtree.out, .out = out.r, .s = dly_cfg, .supply = supply);
|
||||
out_vtree.out = dly.in;
|
||||
|
||||
// Wire output data bits to buffer True lines
|
||||
(i:N:
|
||||
buf.out.d.d[i].t = out.d[i];
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -165,7 +165,7 @@ namespace tmpl {
|
||||
fifo_element[i].supply = supply;
|
||||
fifo_element[i].reset_B = _reset_BXX[i];
|
||||
)
|
||||
fifo_element[N-1].out = out;
|
||||
fifo_element[M-1].out = out;
|
||||
|
||||
// reset buffers
|
||||
bool _reset_BX;
|
||||
@ -685,26 +685,90 @@ namespace tmpl {
|
||||
(i:((1<<N)-1):dly[i].vss = supply.vss;)
|
||||
}
|
||||
|
||||
export
|
||||
defproc line_end_pull_up (a1of1 in; bool? reset_B; power supply; bool! out)
|
||||
{
|
||||
bool _out, __out, nor_out;
|
||||
BUF_X4 buf1(.a=in.a, .y=_out, .vdd=supply.vdd,.vss=supply.vss);
|
||||
BUF_X4 buf2(.a=_out, .y=__out, .vdd=supply.vdd,.vss=supply.vss);
|
||||
// Non programmable delays
|
||||
// N is number of delays to have in series (not log!!).
|
||||
// Is useful for testing purposes.
|
||||
// But should probably remove before running innovus etc.
|
||||
export template<pint N>
|
||||
defproc delay_fifo (bool out; bool in; power supply) {
|
||||
{ N >= 0 : "What?" };
|
||||
[N >= 1 ->
|
||||
DLY4_X1 dly[N];
|
||||
|
||||
NOR2_X1 aenor(.a=_out, .b=reset_B, .y = nor_out, .vdd=supply.vdd,.vss=supply.vss);
|
||||
dly[0].vdd = supply.vdd;
|
||||
dly[0].vss = supply.vss;
|
||||
dly[0].a = in;
|
||||
|
||||
PULLUP_X4 pull_up(.a=nor_out, .y=out);
|
||||
(i:1..N-1:
|
||||
dly[i].vdd = supply.vdd;
|
||||
dly[i].vss = supply.vss;
|
||||
dly[i].a = dly[i-1].y;
|
||||
)
|
||||
|
||||
dly[N-1].vdd = supply.vdd;
|
||||
dly[N-1].vss = supply.vss;
|
||||
dly[N-1].y = out;
|
||||
[] N = 1 ->
|
||||
in = out;
|
||||
]
|
||||
|
||||
}
|
||||
|
||||
defproc line_end_pull_down (a1of1 in; bool? reset_B; power supply; bool! out)
|
||||
|
||||
/**
|
||||
* Appends a hard-coded word "VAL" to an input.
|
||||
* Works by piping through all sigs, but adding
|
||||
* some extra sigs when the input is valid.
|
||||
* N is size of channel to pipe through.
|
||||
* NVAL is size of word to be put on output.
|
||||
* VAL is word to be put on output.
|
||||
* Output looks like
|
||||
* 0..............N........N+NVAL-1
|
||||
* --input_data----LSB....MSB
|
||||
*
|
||||
*/
|
||||
export template<pint N, NVAL, VAL>
|
||||
defproc append (avMx1of2<N> in; avMx1of2<N+NVAL> out; power supply)
|
||||
{
|
||||
bool _out, __out, nor_out;
|
||||
BUF_X4 buf1(.a=in.a, .y=_out, .vdd=supply.vdd,.vss=supply.vss);
|
||||
BUF_X4 buf2(.a=_out, .y=__out, .vdd=supply.vdd,.vss=supply.vss);
|
||||
{ N >= 0 : "What?" };
|
||||
{ NVAL >= 0 : "What?" };
|
||||
{ NVAL < 1<<VAL : "VAL too big!" };
|
||||
|
||||
NOR2_X1 aenor(.a=_out, .b=reset_B, .y = nor_out, .vdd=supply.vdd,.vss=supply.vss);
|
||||
// valid tree
|
||||
vtree<N> in_val(.supply = supply);
|
||||
(i:N:
|
||||
in_val.in.d[i].t = in.d.d[i].t;
|
||||
in_val.in.d[i].f = in.d.d[i].f;
|
||||
)
|
||||
|
||||
// wire through most signals
|
||||
(i:N:
|
||||
in.d.d[i].t = out.d.d[i].t;
|
||||
in.d.d[i].f = out.d.d[i].f;
|
||||
)
|
||||
in.a = out.a;
|
||||
in.v = out.v;
|
||||
|
||||
// appender
|
||||
pint bitval;
|
||||
sigbuf<NVAL> sb(.in = in_val.out, .supply = supply);
|
||||
TIELO_X1 tielows[NVAL];
|
||||
(i:NVAL:tielows[i].vss = supply.vss; tielows[i].vdd = supply.vdd;)
|
||||
(i:0..NVAL-1:
|
||||
bitval = (VAL & ( 1 << i )) >> i;
|
||||
[ bitval = 1 ->
|
||||
out.d.d[i+N].t = sb.out[i];
|
||||
out.d.d[i+N].f = tielows[i].y;
|
||||
[] bitval = 0 ->
|
||||
out.d.d[i+N].f = sb.out[i];
|
||||
out.d.d[i+N].t = tielows[i].y;
|
||||
[] bitval >= 2 -> {false : "fuck"};
|
||||
]
|
||||
)
|
||||
|
||||
PULLUP_X4 pull_down(.a=nor_out, .y=out);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}}
|
||||
|
Reference in New Issue
Block a user