Merge branch 'dev' into HEAD

This commit is contained in:
2022-03-30 15:09:59 +02:00
35 changed files with 59059 additions and 913 deletions

View File

@ -26,6 +26,33 @@
namespace tmpl {
namespace dataflow_neuro{
export defcell KEEP_X1 (bool y; bool vdd, vss) {
bool _y;
prs{
y => _y-
[weak=1] _y => y-
}
sizing {
leak_adjust <- 1;
p_n_mode <- 1;
y {-1}; _y{-1}
}
}
export defcell A_1C2N_RB_X1 (bool! y; bool? c1,n1,n2,pr_B, sr_B; bool vdd, vss) {
bool _y;
prs{
(~c1)|~pr_B -> _y+
c1 & n1 & n2 & sr_B -> _y-
_y => y-
}
sizing {
leak_adjust <- 1;
p_n_mode <- 1;
y {-1}; _y{-1}
}
}
export defcell A_1C1P2N_RB_X1 (bool! y; bool? c1,p1,n1,n2,pr_B, sr_B; bool vdd, vss) {
bool _y;
prs{
@ -41,6 +68,22 @@ namespace tmpl {
}
}
export defcell A_2C1P1N_RB_X1 (bool! y; bool? c1,c2,p1,n1,pr_B,sr_B; bool vdd, vss) {
bool _y;
prs{
(~p1 & ~c1 & ~c2)|~pr_B -> _y+
c1 & c2 & n1 & sr_B -> _y-
_y => y-
}
sizing {
leak_adjust <- 1;
p_n_mode <- 1;
y {-1}; _y{-1}
}
}
export defcell A_1C1P2N_R_X1 (bool! y; bool? c1,p1,n1,n2,pr_B, sr_B; bool vdd, vss) {
prs{
(~p1 & ~c1)|~pr_B -> y-

View File

@ -48,6 +48,45 @@ open std::data;
namespace tmpl {
namespace dataflow_neuro {
/**
* Dualrail decoder.
* Nc is the number of dualrail input channels.
* Then builds N output AND gates, connecting to the right input wires.
*/
export template<pint Nc, N>
defproc decoder_dualrail (Mx1of2<Nc> in; bool? out[N]; power supply) {
// signal buffers
sigbuf<N> in_tX[Nc];
sigbuf<N> in_fX[Nc];
(i:Nc:
in_tX[i].supply = supply;
in_tX[i].in = in.d[i].t;
in_fX[i].supply = supply;
in_fX[i].in = in.d[i].f;
)
// AND trees
pint bitval;
andtree<Nc> atree[N];
(k:0..N-1:atree[k].supply = supply;)
(i:0..N-1:
(j:0..Nc-1:
bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
[bitval = 1 ->
atree[i].in[j] = in_tX[j].out[i];
// atree[i].in[j] = addr_buf.out.d.d[j].t;
[]bitval = 0 ->
atree[i].in[j] = in_fX[j].out[i];
// atree[i].in[j] = addr_buf.out.d.d[j].f;
[]bitval >= 2 -> {false : "fuck"};
]
atree[i].out = out[i];
)
)
}
/**
* 2D decoder which uses a configurable delay from the VCtrees to buffer ack.
* Nx is the x size of the decoder array
@ -63,9 +102,6 @@ namespace tmpl {
// Buffer to recieve concat(x,y) address packet
buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
// NEED TO BUFFER OUTPUTS FROM BUFFER I RECKON
// Validity trees
vtree<NxC> vtree_x (.supply = supply);
@ -91,39 +127,140 @@ namespace tmpl {
// FOR TESTING PURPOSES
// !!!!!!!!!!!!!!!!
// AND trees
pint bitval;
andtree<NxC> atree_x[Nx];
(k:0..Nx-1:atree_x[k].supply = supply;)
(i:0..Nx-1:
(j:0..NxC-1:
bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
[bitval = 1 ->
atree_x[i].in[j] = addr_buf.out.d.d[j].t;
[]bitval = 0 ->
atree_x[i].in[j] = addr_buf.out.d.d[j].f;
[]bitval >= 2 -> {false : "fuck"};
]
atree_x[i].out = outx[i];
)
)
andtree<NyC> atree_y[Ny];
(k:0..Ny-1:atree_y[k].supply = supply;)
(i:0..Ny-1:
(j:0..NyC-1:
bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
[bitval = 1 ->
atree_y[i].in[j] = addr_buf.out.d.d[j+NxC].t;
[]bitval = 0 ->
atree_y[i].in[j] = addr_buf.out.d.d[j+NxC].f;
]
atree_y[i].out = outy[i];
)
)
// Decoder X/Y And trees
decoder_dualrail<NxC,Nx> d_dr_x(.out = outx, .supply = supply);
(i:0..NxC-1:d_dr_x.in.d[i] = addr_buf.out.d.d[i];)
decoder_dualrail<NyC,Ny> d_dr_y(.out = outy, .supply = supply);
(i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];)
}
export template<pint Nx, Ny>
defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
AND2_X1 ands[Nx*Ny];
(i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;)
(x:0..Nx-1:
(y:0..Ny-1:
ands[x + y*Nx].a = inx[x];
ands[x + y*Nx].b = iny[y];
ands[x + y*Nx].y = out[x + y*Nx];
)
)
}
/**
* 2D decoder which uses synapse handshaking using line pulldowns.
* Nx is the x size of the decoder array
* NxC is the number of wires in the x channel.
* but my guess is that we can't do logs...
* the req on a1of1 out is the req to each synapse.
* The ack back from each line should go high when the synapse is charged.
* N_dly is a hard coded delay of the pull down circuit.
* It can be set to 0.
*/
export template<pint NxC, NyC, Nx, Ny, N_dly>
defproc decoder_2d_hs (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? reset_B; power supply) {
// Buffer to recieve concat(x,y) address packet
buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
// Decoder X/Y And trees
decoder_dualrail<NxC,Nx> d_dr_x(.supply = supply);
(i:0..NxC-1:d_dr_x.in.d[i] = addr_buf.out.d.d[i];)
decoder_dualrail<NyC,Ny> d_dr_y(.supply = supply);
(i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];)
// Validity
vtree<NxC> vtree_x (.supply = supply);
vtree<NyC> vtree_y (.supply = supply);
(i:0..NxC-1:vtree_x.in.d[i].t = addr_buf.out.d.d[i].t;)
(i:0..NxC-1:vtree_x.in.d[i].f = addr_buf.out.d.d[i].f;)
(i:0..NyC-1:vtree_y.in.d[i].t = addr_buf.out.d.d[i+NxC].t;)
(i:0..NyC-1:vtree_y.in.d[i].f = addr_buf.out.d.d[i+NxC].f;)
A_2C_B_X1 C2el(.c1 = vtree_x.out, .c2 = vtree_y.out, .y = addr_buf.out.v,
.vdd = supply.vdd, .vss = supply.vss);
// and grid for reqs into synapses
and_grid<Nx, Ny> _and_grid(.inx = d_dr_x.out, .iny = d_dr_y.out, .supply = supply);
(i:Nx*Ny: out[i].r = _and_grid.out[i];)
// Acknowledge pull down time
// Pull UPs on the reqB lines by synapses (easier to invert).
bool _out_reqsB[Nx], _out_acksB[Nx]; // The vertical output ack lines from each syn.
PULLDOWN2_X4 req_pulldowns[Nx*Ny];
pint index;
(i:Nx:
(j:Ny:
index = i + Nx*j;
req_pulldowns[index].a = out[index].a;
req_pulldowns[index].b = _out_acksB[i];
req_pulldowns[index].y = _out_reqsB[i];
req_pulldowns[index].vss = supply.vss;
req_pulldowns[index].vdd = supply.vdd;
)
)
// ReqB keep cells
KEEP_X1 req_keeps[Nx];
(i:Nx:
req_keeps[i].y = _out_reqsB[i];
req_keeps[i].vdd = supply.vdd;
req_keeps[i].vss = supply.vss;
)
// req-ack buffers
sigbuf<Ny> req_bufs[Nx];
(i:Nx:
req_bufs[i].in = _out_reqsB[i];
req_bufs[i].out[0] = _out_acksB[i]; // DANGER DANGER
req_bufs[i].supply = supply;
)
// Line end pull UPs (triggered once synapse reqs removed)
delay_fifo<N_dly> pu_dlys[Nx];
OR2_X1 pu_ORs[Nx];
PULLUP_X4 pu[Nx]; // TODO probably replace this with variable strength PU
AND2_X1 pu_ANDs[Nx];
(i:Nx:
pu_dlys[i].in = _out_acksB[i];
pu_dlys[i].supply = supply;
pu_ORs[i].a = pu_dlys[i].out;
pu_ORs[i].b = d_dr_x.out[i];
pu_ORs[i].vdd = supply.vdd;
pu_ORs[i].vss = supply.vss;
pu_ANDs[i].a = pu_ORs[i].y;
pu_ANDs[i].b = reset_B; // TODO buffer
pu_ANDs[i].vdd = supply.vdd;
pu_ANDs[i].vss = supply.vss;
pu[i].a = pu_ANDs[i].y;
pu[i].y = _out_reqsB[i];
pu[i].vdd = supply.vdd;
pu[i].vss = supply.vss;
)
// ORtree from all output reqs, back to the buffer ack.
// This is instead of the ack that came from the delayed validity trees,
// in decoder_2d_dly.
ortree<Nx> _ortree(.out = addr_buf.out.a, .supply = supply);
INV_X1 out_req_invs[Nx];
(i:Nx:
out_req_invs[i].a = _out_reqsB[i];
out_req_invs[i].vdd = supply.vdd;
out_req_invs[i].vss = supply.vss;
_ortree.in[i] = out_req_invs[i].y;
)
}
/*
@ -224,21 +361,6 @@ namespace tmpl {
}
export template<pint Nx, Ny>
defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
AND2_X1 ands[Nx*Ny];
(i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;)
(x:0..Nx-1:
(y:0..Ny-1:
ands[x + y*Nx].a = inx[x];
ands[x + y*Nx].b = iny[y];
ands[x + y*Nx].y = out[x + y*Nx];
)
)
}
// Generates the OR-trees required to go from
// N one-hot inputs to Nc dual rail binary encoding.
export template<pint Nc, N>
@ -348,7 +470,7 @@ namespace tmpl {
export template<pint NxC, NyC, Nx, Ny, ACK_STRENGTH>
defproc encoder2D(a1of1 x[Nx]; a1of1 y[Ny]; avMx1of2<(NxC + NyC)> out; power supply; bool reset_B) {
defproc encoder2D(a1of1 inx[Nx]; a1of1 iny[Ny]; avMx1of2<(NxC + NyC)> out; power supply; bool reset_B) {
// Reset buffers
pint H = 2*(NxC + NyC); //Reset strength? to be investigated
bool _reset_BX,_reset_BXX[H];
@ -359,10 +481,10 @@ namespace tmpl {
a1of1 _arb_out_x, _arb_out_y;
a1of1 _x_temp[Nx],_y_temp[Ny]; // For wiring the reqs to the arbtrees
(i:Nx:
_x_temp[i].r = x[i].r;
_x_temp[i].r = inx[i].r;
)
(i:Ny:
_y_temp[i].r = y[i].r;
_y_temp[i].r = iny[i].r;
)
arbtree<Nx> Xarb(.in = _x_temp,.out = _arb_out_x,.supply = supply);
arbtree<Ny> Yarb(.in = _y_temp,.out = _arb_out_y,.supply = supply);
@ -372,12 +494,12 @@ namespace tmpl {
sigbuf_1output<ACK_STRENGTH> y_ack_arb[Ny];
(i:Nx:
x_ack_arb[i].in = _x_temp[i].a;
x_ack_arb[i].out = x[i].a;
x_ack_arb[i].out = inx[i].a;
x_ack_arb[i].supply = supply;
)
(i:Ny:
y_ack_arb[i].in = _y_temp[i].a;
y_ack_arb[i].out = y[i].a;
y_ack_arb[i].out = iny[i].a;
y_ack_arb[i].supply = supply;
)
@ -408,7 +530,7 @@ namespace tmpl {
// X_req ORtree
bool _x_req_array[Nx], _x_v_B;
(i:Nx:_x_req_array[i] = x[i].r;)
(i:Nx:_x_req_array[i] = inx[i].r;)
ortree<Nx> x_req_ortree(.in = _x_req_array,.out = _x_v,.supply = supply); //todo BUFF
INV_X1 not_x_req_ortree(.a = _x_v,.y = _x_v_B);
@ -450,17 +572,17 @@ namespace tmpl {
// Encoders
bool x_acks[Nx];
Mx1of2<NxC> x_enc_out;
(i:Nx:x_acks[i] = x[i].a;)
(i:Nx:x_acks[i] = inx[i].a;)
dualrail_encoder<NxC, Nx> x_encoder(.in = x_acks, .out = x_enc_out, .supply = supply);
bool y_acks[Nx];
bool y_acks[Ny];
Mx1of2<NyC> y_enc_out;
(i:Ny:y_acks[i] = y[i].a;)
(i:Ny:y_acks[i] = iny[i].a;)
dualrail_encoder<NyC, Ny> y_encoder(.in = y_acks, .out = y_enc_out, .supply = supply);
// Valid trees
vtree<NxC> vtree_x(.in = x_enc_out, .out = _in_x_v, .supply = supply);
vtree<NxC> vtree_y(.in = y_enc_out, .out = _in_y_v, .supply = supply);
vtree<NyC> vtree_y(.in = y_enc_out, .out = _in_y_v, .supply = supply);
// Buffer func thing
Mx1of2<NxC + NyC> into_buffer;
@ -474,6 +596,185 @@ namespace tmpl {
/**
* Neuron handshaking.
* Looks for a rising edge on the neuron req.
* Then performs a 2d handshake out outy then outx.
*/
export
defproc nrn_hs_2D(a1of1 in; a1of1 outx; a1of1 outy; power supply; bool reset_B) {
bool _reset_BX;
BUF_X2 reset_buf(.a = reset_B, .y = _reset_BX, .vdd = supply.vdd, .vss = supply.vss);
bool _en, _req;
// A_1C2N_RB_X1 A_ack(.c1 = _en, .n1 = _req, .n2 = in.r, .y = in.a,
// .pr_B = _reset_BX, .sr_B = _reset_BX, .vss = supply.vss, .vdd = supply.vdd);
// Switched it back
// Because had the problem that if the req was not removed in time,
// it would be recounted as a double spike,
// since in.req is still high after the out has been dealt with.
A_2C1N_RB_X1 A_ack(.c1 = _en, .c2 = in.r, .n1 = _req, .y = in.a,
.pr_B = _reset_BX, .sr_B = _reset_BX, .vss = supply.vss, .vdd = supply.vdd);
A_1C1P_X1 A_en(.p1 = _req, .c1 = in.a, .y = _en,
.vss = supply.vss, .vdd = supply.vdd);
bool _y_a_B, _x_a_B;
INV_X2 inv_x(.a = outx.a, .y = _x_a_B, .vss = supply.vss, .vdd = supply.vdd);
INV_X2 inv_y(.a = outy.a, .y = _y_a_B, .vss = supply.vss, .vdd = supply.vdd);
A_2C1P1N_RB_X1 A_req(.p1 = _x_a_B, .c1 = _en, .c2 = _y_a_B, .n1 = in.r, .y = _req,
.pr_B = _reset_BX, .sr_B = _reset_BX, .vdd = supply.vdd, .vss = supply.vss);
// y_req pull up
NAND2_X1 nand_y(.a = _y_a_B, .b = _req, .vdd = supply.vdd, .vss = supply.vss);
PULLUP_X4 pu_y(.a = nand_y.y, .y = outy.r, .vdd = supply.vdd, .vss = supply.vss);
// x_req pull up
NAND3_X1 nand_x(.a = _x_a_B, .b = _req, .c = outy.a, .vdd = supply.vdd, .vss = supply.vss);
PULLUP_X4 pu_x(.a = nand_x.y, .y = outx.r, .vdd = supply.vdd, .vss = supply.vss);
}
export
defproc nrn_line_end_pull_down (bool? in; bool? reset_B; power supply; bool! out)
{
bool _out, __out, nand_out;
BUF_X1 buf1(.a=in, .y=_out, .vdd=supply.vdd,.vss=supply.vss);
BUF_X1 buf2(.a=_out, .y=__out, .vdd=supply.vdd,.vss=supply.vss);
INV_X1 inv(.a = __out, .vdd=supply.vdd,.vss =supply.vss);
NAND2_X1 aenor(.a=inv.y, .b=reset_B, .y = nand_out, .vdd=supply.vdd,.vss=supply.vss);
PULLDOWN_X4 pull_down(.a=nand_out, .y=out);
}
/**
* A 2d grid of neuron handshakers.
* Should then slot into the encoder.
* Each neuron has an a1of1 channel (in), which is tripped when a neuron spikes.
* N_dly is number of delay elements to add to line pull down,
* for the purpose of running ACT sims.
* It should probably be set to 0 though.
*/
export template<pint Nx, Ny, N_dly>
defproc nrn_hs_2D_array(a1of1 in[Nx*Ny]; a1of1 outx[Nx], outy[Ny];
power supply; bool reset_B) {
// Make hella signal buffers
sigbuf<Ny> rsbx(.in = reset_B, .supply = supply);
sigbuf<Nx> rsb[Ny]; // ResetSigBuf
(j:Ny:
rsb[j].in = rsbx.out[j];
rsb[j].supply = supply;
)
// Add buffers on output req lines
a1of1 _outx[Nx], _outy[Ny];
BUF_X4 out_req_buf_x[Nx];
(i:Nx:
out_req_buf_x[i].vss = supply.vss;
out_req_buf_x[i].vdd = supply.vdd;
out_req_buf_x[i].a = _outx[i].r;
out_req_buf_x[i].y = outx[i].r;
)
BUF_X4 out_req_buf_y[Ny];
(i:Ny:
out_req_buf_y[i].vss = supply.vss;
out_req_buf_y[i].vdd = supply.vdd;
out_req_buf_y[i].a = _outy[i].r;
out_req_buf_y[i].y = outy[i].r;
)
// Add buffers on output ack lines
// Note that this should be generalised.
// And probably won't even be done by ACT/innovus anwyay
// TODO: do it properly with sigbufs?
BUF_X4 out_ack_buf_x[Nx];
(i:Nx:
out_ack_buf_x[i].vss = supply.vss;
out_ack_buf_x[i].vdd = supply.vdd;
out_ack_buf_x[i].a = outx[i].a;
out_ack_buf_x[i].y = _outx[i].a;
)
BUF_X4 out_ack_buf_y[Ny];
(i:Ny:
out_ack_buf_y[i].vss = supply.vss;
out_ack_buf_y[i].vdd = supply.vdd;
out_ack_buf_y[i].a = outy[i].a;
out_ack_buf_y[i].y = _outy[i].a;
)
// Create handshake grid
pint index;
nrn_hs_2D neurons[Nx*Ny];
(i:0..Nx-1:
(j:0..Ny-1:
index = i + j*Nx;
neurons[index].supply = supply;
neurons[index].reset_B = rsb[j].out[i];
neurons[index].in = in[index];
neurons[index].outx = _outx[i];
neurons[index].outy = _outy[j];
)
)
// Create delay fifos to emulate the fact that the line pull downs
// are at the end of the line, and thus slow.
// Note that if N_dly = 0, delay fifo is just a pipe.
delay_fifo<N_dly> dly_x[Nx];
delay_fifo<N_dly> dly_y[Ny];
// Create x line req pull downs
nrn_line_end_pull_down pd_x[Nx];
sigbuf<Nx> rsb_pd_x(.in = reset_B, .supply = supply);
(i:0..Nx-1:
dly_x[i].supply = supply;
dly_x[i].in = _outx[i].a;
pd_x[i].in = dly_x[i].out;
pd_x[i].out = _outx[i].r;
pd_x[i].reset_B = rsb_pd_x.out[i];
pd_x[i].supply = supply;
)
// Create y line req pull downs
nrn_line_end_pull_down pd_y[Ny];
sigbuf<Ny> rsb_pd_y(.in = reset_B, .supply = supply);
(j:0..Ny-1:
dly_y[j].supply = supply;
dly_y[j].in = _outy[j].a;
pd_y[j].in = dly_y[j].out;
pd_y[j].out = _outy[j].r;
pd_y[j].reset_B = rsb_pd_y.out[j];
pd_y[j].supply = supply;
)
// Add keeps
KEEP_X1 keep_x[Nx];
(i:Nx:
keep_x[i].vdd = supply.vdd;
keep_x[i].vss = supply.vss;
keep_x[i].y = _outx[i].r;
)
KEEP_X1 keep_y[Ny];
(j:Ny:
keep_y[j].vdd = supply.vdd;
keep_y[j].vss = supply.vss;
keep_y[j].y = _outy[j].r;
)
}
}
}

View File

@ -0,0 +1,167 @@
/*************************************************************************
*
* This file is part of ACT dataflow neuro library
*
* Copyright (c) 2022 University of Groningen - Ole Richter
* Copyright (c) 2022 University of Groningen - Michele Mastella
* Copyright (c) 2022 University of Groningen - Hugh Greatorex
* Copyright (c) 2022 University of Groningen - Madison Cotteret
*
*
* This source describes Open Hardware and is licensed under the CERN-OHL-W v2 or later
*
* You may redistribute and modify this documentation and make products
* using it under the terms of the CERN-OHL-W v2 (https:/cern.ch/cern-ohl).
* This documentation is distributed WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY, INCLUDING OF MERCHANTABILITY, SATISFACTORY QUALITY
* AND FITNESS FOR A PARTICULAR PURPOSE. Please see the CERN-OHL-W v2
* for applicable conditions.
*
* Source location: https://git.web.rug.nl/bics/actlib_dataflow_neuro
*
* As per CERN-OHL-W v2 section 4.1, should You produce hardware based on
* these sources, You must maintain the Source Location visible in its
* documentation.
*
**************************************************************************
*/
import "../../dataflow_neuro/cell_lib_async.act";
import "../../dataflow_neuro/cell_lib_std.act";
import "../../dataflow_neuro/treegates.act";
import "../../dataflow_neuro/primitives.act";
import std::channel;
open std::channel;
// import std::func;
open std;
import std::data;
open std::data;
namespace tmpl {
namespace dataflow_neuro {
/**
* Bundled data (non dual rail, with req)
* 2
* quasi delay insensitive channel (dual rail).
* Basically a buffer with a bitwise conversion in front of it.
*/
export template<pint N, N_dly_cfg>
defproc bd2qdi(bd<N> in; avMx1of2<N> out; bool? dly_cfg[N_dly_cfg]; power supply; bool? reset_B) {
// Delay on req_in
bool _req;
delayprog<N_dly_cfg> dly(.in = in.r, .out = _req, .s = dly_cfg, .supply = supply);
// sig buff the reset signal
bool _reset_BX, _reset_BXX[N];
BUF_X4 reset_buf(.a=reset_B, .y=_reset_BX,.vdd=supply.vdd,.vss=supply.vss);
sigbuf<N> reset_bufarray(.in=_reset_BX, .out=_reset_BXX, .supply=supply);
// sig buff the req
bool _reqX, _reqXX[N];
BUF_X4 req_buf(.a=_req, .y=_reqX,.vdd=supply.vdd,.vss=supply.vss);
sigbuf<N> req_bufarray(.in=_reqX, .out=_reqXX, .supply=supply);
// bd2qdi conversion
// Each line goes to a t pin, its not to a f.
bool _inB[N];
INV_X1 input_invs[N];
(i:N:
input_invs[i].a = in.d[i];
input_invs[i].y = _inB[i];
input_invs[i].vss = supply.vss;
input_invs[i].vdd = supply.vdd;
)
// BUFFER
// Basically the buffer_s but with the validity tree ripped out
// and just connected to in_req instead.
// And probably need a delay on the in_ack to ensure en has time to disable
// before the inputs go to another state.
// Actually apparently no: there is a fixed, huge delay, already incurred
// by communicating with pads-> uC -> windows 95 and back again.
// Since the input is never invalid, also need a mechanism
// for the output to become invalid, when an out_ack is received.
//control
bool _en;
A_3C_RB_X4 inack_ctl(.c1=_en,.c2=_reqX,.c3=out.v,.y=in.a,.pr_B=_reset_BX,.sr_B=_reset_BX,.vdd=supply.vdd,.vss=supply.vss);
A_1C1P_X1 en_ctl(.c1=in.a,.p1=out.v,.y=_en,.vdd=supply.vdd,.vss=supply.vss);
//function
bool _out_a_BX_t[N],_out_a_BX_f[N],_out_a_B,_en_X_t[N],_en_X_f[N];
A_2C2N_RB_X4 f_buf_func[N];
A_2C2N_RB_X4 t_buf_func[N];
sigbuf<N> en_buf_t(.in=_en, .out=_en_X_t, .supply=supply);
sigbuf<N> en_buf_f(.in=_en, .out=_en_X_f, .supply=supply);
INV_X1 out_a_inv(.a=out.a,.y=_out_a_B, .vss = supply.vss, .vdd = supply.vdd);
sigbuf<N> out_a_B_buf_f(.in=_out_a_B,.out=_out_a_BX_t, .supply=supply);
sigbuf<N> out_a_B_buf_t(.in=_out_a_B,.out=_out_a_BX_f, .supply=supply);
// check if you can also do single var to array connect a=b[N]
// and remove them from the loop
(i:N:
f_buf_func[i].y=out.d.d[i].f;
t_buf_func[i].y=out.d.d[i].t;
f_buf_func[i].c1=_en_X_f[i];
t_buf_func[i].c1=_en_X_t[i];
f_buf_func[i].c2=_out_a_BX_f[i];
t_buf_func[i].c2=_out_a_BX_t[i];
f_buf_func[i].n1=_inB[i];
t_buf_func[i].n1=in.d[i];
f_buf_func[i].n2=_reqXX[i];
t_buf_func[i].n2=_reqXX[i];
f_buf_func[i].vdd=supply.vdd;
t_buf_func[i].vdd=supply.vdd;
f_buf_func[i].vss=supply.vss;
t_buf_func[i].vss=supply.vss;
t_buf_func[i].pr_B = _reset_BXX[i];
t_buf_func[i].sr_B = _reset_BXX[i];
f_buf_func[i].pr_B = _reset_BXX[i];
f_buf_func[i].sr_B = _reset_BXX[i];
)
}
/**
* quasi delay insensitive channel (dual rail).
* 2
* Bundled data (non dual rail, with req)
*/
export template<pint N, N_dly_cfg>
defproc qdi2bd(avMx1of2<N> in; bd<N> out; bool? dly_cfg[N_dly_cfg]; power supply; bool? reset_B) {
// Buffer
buffer<N> buf(.in = in, .supply = supply, .reset_B = reset_B);
buf.out.a = out.a;
// Vtree
vtree<N> out_vtree(.supply = supply);
(i:N:
out_vtree.in.d[i].t = buf.out.d.d[i].t;
out_vtree.in.d[i].f = buf.out.d.d[i].f;
)
buf.out.v = out_vtree.out;
// Delay
delayprog<N_dly_cfg> dly(.in = out_vtree.out, .out = out.r, .s = dly_cfg, .supply = supply);
out_vtree.out = dly.in;
// Wire output data bits to buffer True lines
(i:N:
buf.out.d.d[i].t = out.d[i];
)
}
}
}

View File

@ -165,7 +165,7 @@ namespace tmpl {
fifo_element[i].supply = supply;
fifo_element[i].reset_B = _reset_BXX[i];
)
fifo_element[N-1].out = out;
fifo_element[M-1].out = out;
// reset buffers
bool _reset_BX;
@ -685,26 +685,90 @@ namespace tmpl {
(i:((1<<N)-1):dly[i].vss = supply.vss;)
}
export
defproc line_end_pull_up (a1of1 in; bool? reset_B; power supply; bool! out)
{
bool _out, __out, nor_out;
BUF_X4 buf1(.a=in.a, .y=_out, .vdd=supply.vdd,.vss=supply.vss);
BUF_X4 buf2(.a=_out, .y=__out, .vdd=supply.vdd,.vss=supply.vss);
// Non programmable delays
// N is number of delays to have in series (not log!!).
// Is useful for testing purposes.
// But should probably remove before running innovus etc.
export template<pint N>
defproc delay_fifo (bool out; bool in; power supply) {
{ N >= 0 : "What?" };
[N >= 1 ->
DLY4_X1 dly[N];
NOR2_X1 aenor(.a=_out, .b=reset_B, .y = nor_out, .vdd=supply.vdd,.vss=supply.vss);
dly[0].vdd = supply.vdd;
dly[0].vss = supply.vss;
dly[0].a = in;
PULLUP_X4 pull_up(.a=nor_out, .y=out);
(i:1..N-1:
dly[i].vdd = supply.vdd;
dly[i].vss = supply.vss;
dly[i].a = dly[i-1].y;
)
dly[N-1].vdd = supply.vdd;
dly[N-1].vss = supply.vss;
dly[N-1].y = out;
[] N = 1 ->
in = out;
]
}
defproc line_end_pull_down (a1of1 in; bool? reset_B; power supply; bool! out)
/**
* Appends a hard-coded word "VAL" to an input.
* Works by piping through all sigs, but adding
* some extra sigs when the input is valid.
* N is size of channel to pipe through.
* NVAL is size of word to be put on output.
* VAL is word to be put on output.
* Output looks like
* 0..............N........N+NVAL-1
* --input_data----LSB....MSB
*
*/
export template<pint N, NVAL, VAL>
defproc append (avMx1of2<N> in; avMx1of2<N+NVAL> out; power supply)
{
bool _out, __out, nor_out;
BUF_X4 buf1(.a=in.a, .y=_out, .vdd=supply.vdd,.vss=supply.vss);
BUF_X4 buf2(.a=_out, .y=__out, .vdd=supply.vdd,.vss=supply.vss);
{ N >= 0 : "What?" };
{ NVAL >= 0 : "What?" };
{ NVAL < 1<<VAL : "VAL too big!" };
NOR2_X1 aenor(.a=_out, .b=reset_B, .y = nor_out, .vdd=supply.vdd,.vss=supply.vss);
// valid tree
vtree<N> in_val(.supply = supply);
(i:N:
in_val.in.d[i].t = in.d.d[i].t;
in_val.in.d[i].f = in.d.d[i].f;
)
// wire through most signals
(i:N:
in.d.d[i].t = out.d.d[i].t;
in.d.d[i].f = out.d.d[i].f;
)
in.a = out.a;
in.v = out.v;
// appender
pint bitval;
sigbuf<NVAL> sb(.in = in_val.out, .supply = supply);
TIELO_X1 tielows[NVAL];
(i:NVAL:tielows[i].vss = supply.vss; tielows[i].vdd = supply.vdd;)
(i:0..NVAL-1:
bitval = (VAL & ( 1 << i )) >> i;
[ bitval = 1 ->
out.d.d[i+N].t = sb.out[i];
out.d.d[i+N].f = tielows[i].y;
[] bitval = 0 ->
out.d.d[i+N].f = sb.out[i];
out.d.d[i+N].t = tielows[i].y;
[] bitval >= 2 -> {false : "fuck"};
]
)
PULLUP_X4 pull_down(.a=nor_out, .y=out);
}
}}