Merge remote-tracking branch 'origin/dev' into dev

This commit is contained in:
Greatorex 2022-04-12 17:05:26 +02:00
commit 4af8fc9008
3 changed files with 100 additions and 47 deletions

View File

@ -47,7 +47,8 @@ N_NRN_X, N_NRN_Y, N_SYN_X, N_SYN_Y, // Number of neurons / synapses
NC_NRN_X, NC_NRN_Y, NC_SYN_X, NC_SYN_Y,
N_SYN_DLY_CFG,
N_NRN_MON_X, N_NRN_MON_Y, N_SYN_MON_X, N_SYN_MON_Y,
N_MON_AMZO_PER_SYN, // Number of signals that each synapse outputs to be monitored.
N_MON_AMZO_PER_SYN, N_MON_AMZO_PER_NRN, // Number of signals that each synapse outputs to be monitored.
N_FLAGS_PER_SYN, N_FLAGS_PER_NRN, // Number of signals that each nrn/syn recieves from the register.
N_BUFFERS,
N_LINE_PD_DLY, // Number of dummy delays to add line pull down
N_BD_DLY_CFG, N_BD_DLY_CFG2,
@ -57,78 +58,84 @@ defproc chip_texel (bd<N_IN> in, out;
Mx1of2<REG_NCW> reg_data[REG_M];
a1of1 synapses[N_SYN_X * N_SYN_Y];
a1of1 neurons[N_NRN_X * N_NRN_Y];
bool! nrn_mon_x[N_NRN_MON_X], nrn_mon_y[N_NRN_MON_Y];
bool! syn_mon_x[N_SYN_MON_X], syn_mon_y[N_SYN_MON_Y];
bool? syn_mon_AMZI[N_SYN_X * N_MON_AMZO_PER_SYN]; // Synapse column monitor outputs
bool? syn_mon_AMO[N_MON_AMZO_PER_SYN];
bool? syn_mon_AMZI[N_SYN_X * N_MON_AMZO_PER_SYN], nrn_mon_AMZI[N_NRN_X * N_MON_AMZO_PER_NRN];
bool! syn_mon_AMZO[N_MON_AMZO_PER_SYN], nrn_mon_AMZO[N_MON_AMZO_PER_NRN];
bool! syn_flags_EFO[N_FLAGS_PER_SYN], nrn_flags_EFO[N_FLAGS_PER_NRN];
bool? bd_dly_cfg[N_BD_DLY_CFG], bd_dly_cfg2[N_BD_DLY_CFG2];
bool? loopback_en;
power supply;
bool? reset_B){
bool _reset_BX;
BUF_X12 reset_buf(.a = reset_B, .y = _reset_BX, .vdd = supply.vdd, .vss = supply.vss);
pint index = 0; // Just useful
bd2qdi<N_IN, N_BD_DLY_CFG, N_BD_DLY_CFG2> _bd2qdi(.in = in, .dly_cfg = bd_dly_cfg, .dly_cfg2 = bd_dly_cfg2,
.reset_B = reset_B, .supply = supply);
fifo<N_IN,N_BUFFERS> fifo_in2fork(.in = _bd2qdi.out, .reset_B = reset_B, .supply = supply);
.reset_B = _reset_BX, .supply = supply);
fifo<N_IN,N_BUFFERS> fifo_in2fork(.in = _bd2qdi.out, .reset_B = _reset_BX, .supply = supply);
fork<N_IN> _fork(.in = fifo_in2fork.out, .reset_B = reset_B, .supply = supply);
fork<N_IN> _fork(.in = fifo_in2fork.out, .reset_B = _reset_BX, .supply = supply);
// Loopback
fifo<N_IN,N_BUFFERS> fifo_fork2drop(.in = _fork.out1, .reset_B = reset_B, .supply = supply);
fifo<N_IN,N_BUFFERS> fifo_fork2drop(.in = _fork.out1, .reset_B = _reset_BX, .supply = supply);
dropper_static<N_IN, false> _loopback_dropper(.in = fifo_fork2drop.out, .cond = loopback_en,
.supply = supply);
// Onwards
fifo<N_IN,N_BUFFERS> fifo_fork2dmx(.in = _fork.out2, .reset_B = reset_B, .supply = supply);
demux_bit_msb<N_IN-1> _demux(.in = fifo_fork2dmx.out, .reset_B = reset_B, .supply = supply);
fifo<N_IN,N_BUFFERS> fifo_fork2dmx(.in = _fork.out2, .reset_B = _reset_BX, .supply = supply);
demux_bit_msb<N_IN-1> _demux(.in = fifo_fork2dmx.out, .reset_B = _reset_BX, .supply = supply);
// Register
fifo<N_IN-1,N_BUFFERS> fifo_dmx2reg(.in = _demux.out2, .reset_B = reset_B, .supply = supply);
fifo<N_IN-1,N_BUFFERS> fifo_dmx2reg(.in = _demux.out2, .reset_B = _reset_BX, .supply = supply);
register_wr_array<REG_NCA, REG_NCW, REG_M> register(.in = fifo_dmx2reg.out, .data = reg_data,
.supply = supply, .reset_B = reset_B);
fifo<N_IN-2,N_BUFFERS> fifo_reg2mrg(.in = register.out, .reset_B = reset_B, .supply = supply);
.supply = supply, .reset_B = _reset_BX);
fifo<N_IN-2,N_BUFFERS> fifo_reg2mrg(.in = register.out, .reset_B = _reset_BX, .supply = supply);
// Spike Decoder
pint NC_SYN;
NC_SYN = NC_SYN_X + NC_SYN_Y;
slice_data<N_IN-1, 0, NC_SYN> slice_pre_dec(.in = _demux.out1, .supply = supply);
fifo<NC_SYN,N_BUFFERS> fifo_dmx2dec(.in = slice_pre_dec.out, .reset_B = reset_B, .supply = supply);
fifo<NC_SYN,N_BUFFERS> fifo_dmx2dec(.in = slice_pre_dec.out, .reset_B = _reset_BX, .supply = supply);
decoder_2d_hybrid<NC_SYN_X, NC_SYN_Y, N_SYN_X, N_SYN_Y, N_SYN_DLY_CFG> decoder(.in = fifo_dmx2dec.out,
.out = synapses,
.hs_en = register.data[0].d[0].t, // Defaults to handshake disable
.supply = supply, .reset_B = reset_B);
.supply = supply, .reset_B = _reset_BX);
(i:N_SYN_DLY_CFG: decoder.dly_cfg[i] = register.data[0].d[1 + i].f;) // Defaults to max delay
// Neurons + encoder
pint NC_NRN;
NC_NRN = NC_NRN_X + NC_NRN_Y;
nrn_hs_2d_array<N_NRN_X,N_NRN_Y,N_LINE_PD_DLY> nrn_grid(.in = neurons,
.supply = supply, .reset_B = reset_B);
.supply = supply, .reset_B = _reset_BX);
encoder2d_simple<NC_NRN_X, NC_NRN_Y, N_NRN_X, N_NRN_Y> encoder(
.inx = nrn_grid.outx,
.iny = nrn_grid.outy,
.reset_B = reset_B, .supply = supply
.reset_B = _reset_BX, .supply = supply
);
fifo<NC_NRN, N_BUFFERS> fifo_enc2mrg(.in = encoder.out,
.reset_B = reset_B, .supply = supply);
.reset_B = _reset_BX, .supply = supply);
// Merge
append<NC_NRN, N_IN-NC_NRN, 0> append_enc(.in = fifo_enc2mrg.out, .supply = supply);
append<N_IN-2, 2, 0> append_reg(.in = fifo_reg2mrg.out, .supply = supply);
merge<N_IN> merge_enc8reg(.in1 = append_enc.out, .in2 = append_reg.out,
.supply = supply, .reset_B = reset_B);
.supply = supply, .reset_B = _reset_BX);
merge<N_IN> merge_loop8mrg(.in1 = merge_enc8reg.out, .in2 = _loopback_dropper.out,
.reset_B = reset_B, .supply = supply);
.reset_B = _reset_BX, .supply = supply);
// qdi2bd
fifo<N_IN, N_BUFFERS> fifo_mrg2bd(.in = merge_loop8mrg.out,
.reset_B = reset_B, .supply = supply);
.reset_B = _reset_BX, .supply = supply);
qdi2bd<N_IN, N_BD_DLY_CFG> _qdi2bd(.in = fifo_mrg2bd.out, .out = out, .dly_cfg = bd_dly_cfg,
.reset_B = reset_B, .supply = supply);
.reset_B = _reset_BX, .supply = supply);
@ -143,14 +150,14 @@ defproc chip_texel (bd<N_IN> in, out;
(i:NC_NRN_MON_X:
nrn_mon_dec_x.in.d[i] = register.data[2].d[i];
)
sigbuf_boolarray<N_NRN_MON_X, 40> nrn_mon_x_buf(.in = nrn_mon_dec_x.out, .out = nrn_mon_x, .supply = supply);
sigbuf_boolarray<N_NRN_MON_X, 16> nrn_mon_x_buf(.in = nrn_mon_dec_x.out, .out = nrn_mon_x, .supply = supply);
decoder_dualrail_en<NC_NRN_MON_Y, N_NRN_MON_Y> nrn_mon_dec_y(.supply = supply);
nrn_mon_dec_y.en = register.data[1].d[0].t;
(i:NC_NRN_MON_Y:
nrn_mon_dec_y.in.d[i] = register.data[2].d[i+NC_NRN_MON_X];
)
sigbuf_boolarray<N_NRN_MON_Y, 40> nrn_mon_y_buf(.in = nrn_mon_dec_y.out, .out = nrn_mon_y, .supply = supply);
sigbuf_boolarray<N_NRN_MON_Y, 16> nrn_mon_y_buf(.in = nrn_mon_dec_y.out, .out = nrn_mon_y, .supply = supply);
decoder_dualrail_en<NC_SYN_MON_X, N_SYN_MON_X> syn_mon_dec_x(
.supply = supply);
@ -158,14 +165,14 @@ defproc chip_texel (bd<N_IN> in, out;
(i:NC_SYN_MON_X:
syn_mon_dec_x.in.d[i] = register.data[3].d[i];
)
sigbuf_boolarray<N_SYN_MON_X, 40> syn_mon_x_buf(.out = syn_mon_x, .supply = supply);
sigbuf_boolarray<N_SYN_MON_X, 16> syn_mon_x_buf(.out = syn_mon_x, .supply = supply);
decoder_dualrail_en<NC_SYN_MON_Y, N_SYN_MON_Y> syn_mon_dec_y(.supply = supply);
syn_mon_dec_y.en = register.data[1].d[1].t;
(i:NC_SYN_MON_Y:
syn_mon_dec_y.in.d[i] = register.data[3].d[i+NC_SYN_MON_X];
)
sigbuf_boolarray<N_SYN_MON_Y,40> syn_mon_y_buf(.out = syn_mon_y, .in = syn_mon_dec_y.out, .supply = supply);
sigbuf_boolarray<N_SYN_MON_Y,16> syn_mon_y_buf(.out = syn_mon_y, .in = syn_mon_dec_y.out, .supply = supply);
// Device debug hard-wired safety (reg0, b05 = DEV_DEBUG)
// Stops the possibility of dev_mon being high while some other sig is high.
@ -193,19 +200,59 @@ defproc chip_texel (bd<N_IN> in, out;
]
// Create TBUFs for each synapse column,
// ctrl wired to mon line (0'th in each 4).
pint N_TBUF = N_SYN_X * N_MON_AMZO_PER_SYN;
TBUF_X4 syn_x_AMZI_tbuf[N_TBUF];
(i:N_SYN_X:
(j:N_MON_AMZO_PER_SYN:
// ctrl wired to mon line (first in each 4).
TBUF_X4 syn_x_AMZI_tbuf[N_SYN_X * N_MON_AMZO_PER_SYN];
sigbuf_boolarray<N_MON_AMZO_PER_SYN, 40> syn_mon_AMZO_sb(.out = syn_mon_AMZO, .supply = supply);
(j:N_MON_AMZO_PER_SYN:
(i:N_SYN_X:
index = i*N_MON_AMZO_PER_SYN + j;
syn_x_AMZI_tbuf[index].a = syn_mon_AMZI[index];
syn_x_AMZI_tbuf[index].en = syn_mon_x[i*4];
syn_x_AMZI_tbuf[index].y = syn_mon_AMO[j];
syn_x_AMZI_tbuf[index].y = syn_mon_AMZO_sb.in[j];
)
)
// Create TBUFs for each neuron column,
// ctrl wired to mon line (first in each 4).
TBUF_X4 nrn_x_AMZI_tbuf[N_NRN_X * N_MON_AMZO_PER_NRN];
sigbuf_boolarray<N_MON_AMZO_PER_NRN, 40> nrn_mon_AMZO_sb(.out = nrn_mon_AMZO, .supply = supply);
(j:N_MON_AMZO_PER_NRN:
(i:N_NRN_X:
index = i*N_MON_AMZO_PER_NRN + j;
nrn_x_AMZI_tbuf[index].a = nrn_mon_AMZI[index];
nrn_x_AMZI_tbuf[index].en = nrn_mon_x[i*2];
nrn_x_AMZI_tbuf[index].y = nrn_mon_AMZO_sb.in[j];
)
)
// Create NON buffered signals from register to nrns.
(i:N_FLAGS_PER_NRN:
nrn_flags_EFO[i] = register.data[5].d[i].t;
)
// Create NON buffered signals from register to synapses.
// Includes safety on the first 3 flags with dev mon.
(i:3..N_FLAGS_PER_SYN-1:
syn_flags_EFO[i] = register.data[4].d[i].t;
)
AND2_X1 syn_flags_dev_safety[3];
BUF_X4 syn_flags_dev_safety_sb[3];
(i:0..2:
syn_flags_dev_safety[i].a = register.data[4].d[i].t; // syn flag bit
syn_flags_dev_safety[i].b = register.data[0].d[5].f; // no device is being monitored.
syn_flags_dev_safety_sb[i].a = syn_flags_dev_safety[i].y;
syn_flags_dev_safety_sb[i].y = syn_flags_EFO[i];
syn_flags_dev_safety[i].vdd = supply.vdd;
syn_flags_dev_safety[i].vss = supply.vss;
syn_flags_dev_safety_sb[i].vdd = supply.vdd;
syn_flags_dev_safety_sb[i].vss = supply.vss;
)

View File

@ -260,14 +260,14 @@ namespace tmpl {
bool _out1_a_B;
A_2C2N_RB_X4 out1_f_buf_func[N];
A_2C2N_RB_X4 out1_t_buf_func[N];
sigbuf<N*2> out1_en_buf(.in=_en, .supply=supply);
sigbuf<N*4> out_en_buf(.in=_en, .supply=supply);
INV_X1 out1_a_inv(.a=out1.a,.y=_out1_a_B, .vdd = supply.vdd, .vss = supply.vss);
sigbuf<N*2> out1_a_B_buf(.in=_out1_a_B, .supply=supply);
(i:N:
out1_f_buf_func[i].y=out1.d.d[i].f;
out1_t_buf_func[i].y=out1.d.d[i].t;
out1_f_buf_func[i].c1=out1_en_buf.out[i];
out1_t_buf_func[i].c1=out1_en_buf.out[i+N];
out1_f_buf_func[i].c1=out_en_buf.out[i];
out1_t_buf_func[i].c1=out_en_buf.out[i+N];
out1_f_buf_func[i].c2=out1_a_B_buf.out[i];
out1_t_buf_func[i].c2=out1_a_B_buf.out[i+N];
out1_f_buf_func[i].n1=in.d.d[i].f;
@ -288,14 +288,14 @@ namespace tmpl {
bool _out2_a_B;
A_2C2N_RB_X4 out2_f_buf_func[N];
A_2C2N_RB_X4 out2_t_buf_func[N];
sigbuf<N*2> out2_en_buf(.in=_en, .supply=supply);
// sigbuf<N*2> out2_en_buf(.in=_en, .supply=supply);
INV_X1 out2_a_inv(.a=out2.a,.y=_out2_a_B, .vdd = supply.vdd, .vss = supply.vss);
sigbuf<N*2> out2_a_B_buf(.in=_out2_a_B);
(i:N:
out2_f_buf_func[i].y=out2.d.d[i].f;
out2_t_buf_func[i].y=out2.d.d[i].t;
out2_f_buf_func[i].c1=out2_en_buf.out[i];
out2_t_buf_func[i].c1=out2_en_buf.out[i+N];
out2_f_buf_func[i].c1=out_en_buf.out[i+2*N];
out2_t_buf_func[i].c1=out_en_buf.out[i+3*N];
out2_f_buf_func[i].c2=out2_a_B_buf.out[i];
out2_t_buf_func[i].c2=out2_a_B_buf.out[i+N];
out2_f_buf_func[i].n1=in.d.d[i].f;

View File

@ -151,13 +151,14 @@ A_1C2N_R_X1 A_en2(.c1 = _w, .n1 = _en2, .n2 = _out_vB, .y = _en2,
// Pass to let data into the buffer
NOR2_X1 pass(.a = _en2, .b = _flush, .vss = supply.vss, .vdd = supply.vdd);
sigbuf<N*2> passX(.in = pass.y, .supply = supply);
AND2_X1 gandalf_t[N];
AND2_X1 gandalf_f[N];
(i:0..N-1:
gandalf_t[i].a = in.d.d[i].t;
gandalf_f[i].a = in.d.d[i].f;
gandalf_t[i].b = pass.y;
gandalf_f[i].b = pass.y;
gandalf_t[i].b = passX.out[i];
gandalf_f[i].b = passX.out[i+N];
gandalf_t[i].y = buf.in.d.d[i].t;
gandalf_f[i].y = buf.in.d.d[i].f;
@ -261,8 +262,6 @@ export template<pint NcA, NcW, M>
defproc register_wr_array(avMx1of2<NcA + NcW + 1> in; Mx1of2<NcW> data[M]; avMx1of2<NcA+NcW> out;
bool? reset_B; power supply) {
// BIG TODO
// I HAVE NOT BOTHERED WITH ANY SIGNAL BUFFERING IN HERE YET
// Input valid tree
vtree<NcA + NcW + 1> input_valid(.in = in.d, .out = in.v,
@ -292,9 +291,11 @@ A_2C_B_X1 ack_safety(.c1 = ack_rw_or.y, .c2 = in.v, .y = in.a);
// Write bit selector
bool _w = in.d.d[NcA+NcW].t;
bool _wX[M];
sigbuf<M> _w_sb(.in = _w, .out = _wX, .supply = supply);
A_2C_B_X1 write_selectors[M];
(i:M:
write_selectors[i].c1 = _w;
write_selectors[i].c1 = _wX[i];
write_selectors[i].c2 = decoder.out[i];
write_selectors[i].vdd = supply.vdd;
write_selectors[i].vss = supply.vss;
@ -328,12 +329,17 @@ TIELO_X1 tielow_writebit_f[M];
// Read bit selector
bool _r = in.d.d[NcA+NcW].f;
bool _rX[M+NcA];
sigbuf<M+NcA> _r_sb(.in = _r, .out = _rX, .supply = supply);
A_2C_B_X1 read_selectors[M];
sigbuf_boolarray<M, NcW*2> read_selectorsX(.supply = supply);
(i:M:
read_selectors[i].c1 = _r;
read_selectors[i].c1 = _rX[i];
read_selectors[i].c2 = decoder.out[i];
read_selectors[i].vdd = supply.vdd;
read_selectors[i].vss = supply.vss;
read_selectorsX.in[i] = read_selectors[i].y;
)
// OrTrees for each output word bit on read
@ -357,9 +363,9 @@ pint index;
index = i + j*NcW;
and_reads_t[index].a = data[j].d[i].t;
and_reads_t[index].b = read_selectors[j].y;
and_reads_t[index].b = read_selectorsX.out[j];
and_reads_f[index].a = data[j].d[i].f;
and_reads_f[index].b = read_selectors[j].y;
and_reads_f[index].b = read_selectorsX.out[j];
and_reads_t[index].y = out_ortrees_t[i].in[j];
and_reads_f[index].y = out_ortrees_f[i].in[j];
@ -379,8 +385,8 @@ A_2C_B_X1 addr_read_f[NcA];
addr_read_t[i].c1 = in.d.d[i].t;
addr_read_f[i].c1 = in.d.d[i].f;
addr_read_t[i].c2 = _r;
addr_read_f[i].c2 = _r;
addr_read_t[i].c2 = _rX[M+i];
addr_read_f[i].c2 = _rX[M+i];
addr_read_t[i].y = out.d.d[i].t;
addr_read_f[i].y = out.d.d[i].f;