Merge remote-tracking branch 'origin/dev' into dev

This commit is contained in:
Greatorex 2022-04-15 12:48:27 +02:00
commit c41d7f4fdf
6 changed files with 577016 additions and 92 deletions

View File

@ -42,7 +42,8 @@ open std::channel;
namespace tmpl {
namespace dataflow_neuro {
export template<pint N_IN, // Size of input data from outside world
export template<pint N_IN, // Size of input data from outside world
N_NRN_X, N_NRN_Y, N_SYN_X, N_SYN_Y, // Number of neurons / synapses
NC_NRN_X, NC_NRN_Y, NC_SYN_X, NC_SYN_Y,
N_SYN_DLY_CFG,
@ -51,13 +52,13 @@ N_MON_AMZO_PER_SYN, N_MON_AMZO_PER_NRN, // Number of signals that each synapse o
N_FLAGS_PER_SYN, N_FLAGS_PER_NRN, // Number of signals that each nrn/syn recieves from the register.
N_BUFFERS,
N_LINE_PD_DLY, // Number of dummy delays to add line pull down
N_BD_DLY_CFG, N_BD_DLY_CFG2,
// N_BD_DLY_CFG, N_BD_DLY_CFG2,
REG_NCA, REG_NCW, REG_M>
defproc chip_texel (bd<N_IN> in, out;
Mx1of2<REG_NCW> reg_data[REG_M];
a1of1 synapses[N_SYN_X * N_SYN_Y];
a1of1 neurons[N_NRN_X * N_NRN_Y];
defproc texel_core (avMx1of2<N_IN> in, out;
Mx1of2<REG_NCW> reg_data[REG_M];
a1of1 synapses[N_SYN_X * N_SYN_Y];
a1of1 neurons[N_NRN_X * N_NRN_Y];
bool! nrn_mon_x[N_NRN_MON_X], nrn_mon_y[N_NRN_MON_Y];
bool! syn_mon_x[N_SYN_MON_X], syn_mon_y[N_SYN_MON_Y];
@ -65,76 +66,68 @@ defproc chip_texel (bd<N_IN> in, out;
bool! syn_mon_AMZO[N_MON_AMZO_PER_SYN], nrn_mon_AMZO[N_MON_AMZO_PER_NRN];
bool! syn_flags_EFO[N_FLAGS_PER_SYN], nrn_flags_EFO[N_FLAGS_PER_NRN];
bool? bd_dly_cfg[N_BD_DLY_CFG], bd_dly_cfg2[N_BD_DLY_CFG2];
bool? loopback_en;
power supply;
bool? reset_B){
// bool? bd_dly_cfg[N_BD_DLY_CFG], bd_dly_cfg2[N_BD_DLY_CFG2];
// bool? loopback_en;
power supply;
bool? reset_B){
bool _reset_BX;
BUF_X12 reset_buf(.a = reset_B, .y = _reset_BX, .vdd = supply.vdd, .vss = supply.vss);
pint index = 0; // Just useful
bd2qdi<N_IN, N_BD_DLY_CFG, N_BD_DLY_CFG2> _bd2qdi(.in = in, .dly_cfg = bd_dly_cfg, .dly_cfg2 = bd_dly_cfg2,
.reset_B = _reset_BX, .supply = supply);
fifo<N_IN,N_BUFFERS> fifo_in2fork(.in = _bd2qdi.out, .reset_B = _reset_BX, .supply = supply);
// Onwards
fifo<N_IN,N_BUFFERS> fifo_in(.in = in, .reset_B = _reset_BX, .supply = supply);
demux_bit_msb<N_IN-1> _demux(.in = fifo_in.out, .reset_B = _reset_BX, .supply = supply);
fork<N_IN> _fork(.in = fifo_in2fork.out, .reset_B = _reset_BX, .supply = supply);
// Loopback
fifo<N_IN,N_BUFFERS> fifo_fork2drop(.in = _fork.out1, .reset_B = _reset_BX, .supply = supply);
dropper_static<N_IN, false> _loopback_dropper(.in = fifo_fork2drop.out, .cond = loopback_en,
.supply = supply);
// Onwards
fifo<N_IN,N_BUFFERS> fifo_fork2dmx(.in = _fork.out2, .reset_B = _reset_BX, .supply = supply);
demux_bit_msb<N_IN-1> _demux(.in = fifo_fork2dmx.out, .reset_B = _reset_BX, .supply = supply);
// Register
fifo<N_IN-1,N_BUFFERS> fifo_dmx2reg(.in = _demux.out2, .reset_B = _reset_BX, .supply = supply);
register_wr_array<REG_NCA, REG_NCW, REG_M> register(.in = fifo_dmx2reg.out, .data = reg_data,
.supply = supply, .reset_B = _reset_BX);
fifo<N_IN-2,N_BUFFERS> fifo_reg2mrg(.in = register.out, .reset_B = _reset_BX, .supply = supply);
// Register
fifo<N_IN-1,N_BUFFERS> fifo_dmx2reg(.in = _demux.out2, .reset_B = _reset_BX, .supply = supply);
register_wr_array<REG_NCA, REG_NCW, REG_M> register(.in = fifo_dmx2reg.out, .data = reg_data,
.supply = supply, .reset_B = _reset_BX);
fifo<N_IN-2,N_BUFFERS> fifo_reg2mrg(.in = register.out, .reset_B = _reset_BX, .supply = supply);
// Spike Decoder
// Spike Decoder
pint NC_SYN;
NC_SYN = NC_SYN_X + NC_SYN_Y;
slice_data<N_IN-1, 0, NC_SYN> slice_pre_dec(.in = _demux.out1, .supply = supply);
fifo<NC_SYN,N_BUFFERS> fifo_dmx2dec(.in = slice_pre_dec.out, .reset_B = _reset_BX, .supply = supply);
decoder_2d_hybrid<NC_SYN_X, NC_SYN_Y, N_SYN_X, N_SYN_Y, N_SYN_DLY_CFG> decoder(.in = fifo_dmx2dec.out,
.out = synapses,
.hs_en = register.data[0].d[0].t, // Defaults to handshake disable
.supply = supply, .reset_B = _reset_BX);
(i:N_SYN_DLY_CFG: decoder.dly_cfg[i] = register.data[0].d[1 + i].f;) // Defaults to max delay
fifo<NC_SYN,N_BUFFERS> fifo_dmx2dec(.in = slice_pre_dec.out, .reset_B = _reset_BX, .supply = supply);
decoder_2d_hybrid<NC_SYN_X, NC_SYN_Y, N_SYN_X, N_SYN_Y, N_SYN_DLY_CFG> decoder(.in = fifo_dmx2dec.out,
.out = synapses,
.hs_en = register.data[0].d[0].t, // Defaults to handshake disable
.ack_disable = register.data[1].d[2].t, // Defaults to ack enabled
.supply = supply, .reset_B = _reset_BX);
INV_X1 dly_cfg_inverters[N_SYN_DLY_CFG];
(i:N_SYN_DLY_CFG:
dly_cfg_inverters[i].a = register.data[0].d[1+i].t; // iff t is high, is the delay disabled.
dly_cfg_inverters[i].vdd = supply.vdd;
dly_cfg_inverters[i].vss = supply.vss;
decoder.dly_cfg[i] = dly_cfg_inverters[i].y;
) // This sexy hack means that the inverters are max delay throughout the register flush operations.
// Neurons + encoder
// Neurons + encoder
pint NC_NRN;
NC_NRN = NC_NRN_X + NC_NRN_Y;
nrn_hs_2d_array<N_NRN_X,N_NRN_Y,N_LINE_PD_DLY> nrn_grid(.in = neurons,
.supply = supply, .reset_B = _reset_BX);
encoder2d_simple<NC_NRN_X, NC_NRN_Y, N_NRN_X, N_NRN_Y> encoder(
.inx = nrn_grid.outx,
.iny = nrn_grid.outy,
.reset_B = _reset_BX, .supply = supply
);
fifo<NC_NRN, N_BUFFERS> fifo_enc2mrg(.in = encoder.out,
.reset_B = _reset_BX, .supply = supply);
nrn_hs_2d_array<N_NRN_X,N_NRN_Y,N_LINE_PD_DLY> nrn_grid(.in = neurons,
.supply = supply, .reset_B = _reset_BX);
encoder2d_simple<NC_NRN_X, NC_NRN_Y, N_NRN_X, N_NRN_Y> encoder(
.inx = nrn_grid.outx,
.iny = nrn_grid.outy,
.reset_B = _reset_BX, .supply = supply
);
fifo<NC_NRN, N_BUFFERS> fifo_enc2mrg(.in = encoder.out,
.reset_B = _reset_BX, .supply = supply);
// Merge
append<NC_NRN, N_IN-NC_NRN, 0> append_enc(.in = fifo_enc2mrg.out, .supply = supply);
append<N_IN-2, 2, 0> append_reg(.in = fifo_reg2mrg.out, .supply = supply);
append<N_IN-2, 2, 2> append_reg(.in = fifo_reg2mrg.out, .supply = supply);
merge<N_IN> merge_enc8reg(.in1 = append_enc.out, .in2 = append_reg.out,
.supply = supply, .reset_B = _reset_BX);
merge<N_IN> merge_loop8mrg(.in1 = merge_enc8reg.out, .in2 = _loopback_dropper.out,
.reset_B = _reset_BX, .supply = supply);
// qdi2bd
fifo<N_IN, N_BUFFERS> fifo_mrg2bd(.in = merge_loop8mrg.out,
.reset_B = _reset_BX, .supply = supply);
qdi2bd<N_IN, N_BD_DLY_CFG> _qdi2bd(.in = fifo_mrg2bd.out, .out = out, .dly_cfg = bd_dly_cfg,
// Output
fifo<N_IN, N_BUFFERS> fifo_out(.in = merge_enc8reg.out, .out = out,
.reset_B = _reset_BX, .supply = supply);
@ -150,14 +143,14 @@ defproc chip_texel (bd<N_IN> in, out;
(i:NC_NRN_MON_X:
nrn_mon_dec_x.in.d[i] = register.data[2].d[i];
)
sigbuf_boolarray<N_NRN_MON_X, 16> nrn_mon_x_buf(.in = nrn_mon_dec_x.out, .out = nrn_mon_x, .supply = supply);
sigbuf_boolarray<N_NRN_MON_X, 13> nrn_mon_x_buf(.in = nrn_mon_dec_x.out, .out = nrn_mon_x, .supply = supply);
decoder_dualrail_en<NC_NRN_MON_Y, N_NRN_MON_Y> nrn_mon_dec_y(.supply = supply);
nrn_mon_dec_y.en = register.data[1].d[0].t;
(i:NC_NRN_MON_Y:
nrn_mon_dec_y.in.d[i] = register.data[2].d[i+NC_NRN_MON_X];
)
sigbuf_boolarray<N_NRN_MON_Y, 16> nrn_mon_y_buf(.in = nrn_mon_dec_y.out, .out = nrn_mon_y, .supply = supply);
sigbuf_boolarray<N_NRN_MON_Y, 48> nrn_mon_y_buf(.in = nrn_mon_dec_y.out, .out = nrn_mon_y, .supply = supply);
decoder_dualrail_en<NC_SYN_MON_X, N_SYN_MON_X> syn_mon_dec_x(
.supply = supply);
@ -165,14 +158,14 @@ defproc chip_texel (bd<N_IN> in, out;
(i:NC_SYN_MON_X:
syn_mon_dec_x.in.d[i] = register.data[3].d[i];
)
sigbuf_boolarray<N_SYN_MON_X, 16> syn_mon_x_buf(.out = syn_mon_x, .supply = supply);
sigbuf_boolarray<N_SYN_MON_X, 13> syn_mon_x_buf(.out = syn_mon_x, .supply = supply);
decoder_dualrail_en<NC_SYN_MON_Y, N_SYN_MON_Y> syn_mon_dec_y(.supply = supply);
syn_mon_dec_y.en = register.data[1].d[1].t;
(i:NC_SYN_MON_Y:
syn_mon_dec_y.in.d[i] = register.data[3].d[i+NC_SYN_MON_X];
)
sigbuf_boolarray<N_SYN_MON_Y,16> syn_mon_y_buf(.out = syn_mon_y, .in = syn_mon_dec_y.out, .supply = supply);
sigbuf_boolarray<N_SYN_MON_Y, 48> syn_mon_y_buf(.out = syn_mon_y, .in = syn_mon_dec_y.out, .supply = supply);
// Device debug hard-wired safety (reg0, b05 = DEV_DEBUG)
// Stops the possibility of dev_mon being high while some other sig is high.
@ -250,14 +243,212 @@ defproc chip_texel (bd<N_IN> in, out;
syn_flags_dev_safety_sb[i].vss = supply.vss;
)
}
export template<pint N_IN, // Size of input data from outside world
N_NRN_X, N_NRN_Y, N_SYN_X, N_SYN_Y, // Number of neurons / synapses
NC_NRN_X, NC_NRN_Y, NC_SYN_X, NC_SYN_Y,
N_SYN_DLY_CFG,
N_NRN_MON_X, N_NRN_MON_Y, N_SYN_MON_X, N_SYN_MON_Y,
N_MON_AMZO_PER_SYN, N_MON_AMZO_PER_NRN, // Number of signals that each synapse outputs to be monitored.
N_FLAGS_PER_SYN, N_FLAGS_PER_NRN, // Number of signals that each nrn/syn recieves from the register.
N_BUFFERS,
N_LINE_PD_DLY, // Number of dummy delays to add line pull down
N_BD_DLY_CFG, N_BD_DLY_CFG2,
REG_NCA, REG_NCW, REG_M>
defproc texel_singlecore (bd<N_IN> in, out;
Mx1of2<REG_NCW> reg_data[REG_M];
a1of1 synapses[N_SYN_X * N_SYN_Y];
a1of1 neurons[N_NRN_X * N_NRN_Y];
bool! nrn_mon_x[N_NRN_MON_X], nrn_mon_y[N_NRN_MON_Y];
bool! syn_mon_x[N_SYN_MON_X], syn_mon_y[N_SYN_MON_Y];
bool? syn_mon_AMZI[N_SYN_X * N_MON_AMZO_PER_SYN], nrn_mon_AMZI[N_NRN_X * N_MON_AMZO_PER_NRN];
bool! syn_mon_AMZO[N_MON_AMZO_PER_SYN], nrn_mon_AMZO[N_MON_AMZO_PER_NRN];
bool! syn_flags_EFO[N_FLAGS_PER_SYN], nrn_flags_EFO[N_FLAGS_PER_NRN];
bool? bd_dly_cfg[N_BD_DLY_CFG], bd_dly_cfg2[N_BD_DLY_CFG2];
bool? loopback_en;
power supply;
bool? reset_B){
bool _reset_BX;
BUF_X12 reset_buf(.a = reset_B, .y = _reset_BX, .vdd = supply.vdd, .vss = supply.vss);
pint index = 0; // Just useful
bd2qdi<N_IN, N_BD_DLY_CFG, N_BD_DLY_CFG2> _bd2qdi(.in = in, .dly_cfg = bd_dly_cfg, .dly_cfg2 = bd_dly_cfg2,
.reset_B = _reset_BX, .supply = supply);
fifo<N_IN,N_BUFFERS> fifo_in2fork(.in = _bd2qdi.out, .reset_B = _reset_BX, .supply = supply);
fork<N_IN> _fork(.in = fifo_in2fork.out, .reset_B = _reset_BX, .supply = supply);
// Loopback
fifo<N_IN,N_BUFFERS> fifo_fork2drop(.in = _fork.out1, .reset_B = _reset_BX, .supply = supply);
dropper_static<N_IN, false> _loopback_dropper(.in = fifo_fork2drop.out, .cond = loopback_en,
.supply = supply);
fifo<N_IN,N_BUFFERS> fifo_drop2mrg(.in = _loopback_dropper.out, .reset_B = _reset_BX, .supply = supply);
// Onwards to core
fifo<N_IN,N_BUFFERS> fifo_fork2core(.in = _fork.out2, .reset_B = _reset_BX, .supply = supply);
texel_core<N_IN,N_NRN_X, N_NRN_Y, N_SYN_X, N_SYN_Y,NC_NRN_X, NC_NRN_Y, NC_SYN_X, NC_SYN_Y,N_SYN_DLY_CFG,N_NRN_MON_X, N_NRN_MON_Y, N_SYN_MON_X, N_SYN_MON_Y,N_MON_AMZO_PER_SYN, N_MON_AMZO_PER_NRN,N_FLAGS_PER_SYN, N_FLAGS_PER_NRN,N_BUFFERS,N_LINE_PD_DLY, REG_NCA, REG_NCW, REG_M>
core(.in = fifo_fork2core.out,
.reg_data = reg_data,
.synapses = synapses,
.neurons = neurons,
.nrn_mon_x = nrn_mon_x, .nrn_mon_y = nrn_mon_y,
.syn_mon_x = syn_mon_x, .syn_mon_y = syn_mon_y,
.syn_mon_AMZI = syn_mon_AMZI, .nrn_mon_AMZI = nrn_mon_AMZI,
.syn_mon_AMZO = syn_mon_AMZO, .nrn_mon_AMZO = nrn_mon_AMZO,
.syn_flags_EFO = syn_flags_EFO, .nrn_flags_EFO = nrn_flags_EFO,
.reset_B = _reset_BX,
.supply = supply
);
// qdi2bd
fifo<N_IN, N_BUFFERS> fifo_core2mrg(.in = core.out,
.reset_B = _reset_BX, .supply = supply);
// merge core output and loopback
merge<N_IN> merge_drop8core(.in1 = fifo_core2mrg.out, .in2 = fifo_drop2mrg.out,
.supply = supply, .reset_B = _reset_BX);
qdi2bd<N_IN, N_BD_DLY_CFG> _qdi2bd(.in = merge_drop8core.out, .out = out, .dly_cfg = bd_dly_cfg,
.reset_B = _reset_BX, .supply = supply);
}
export template<pint N_IN, // Size of input data from outside world
N_NRN_X, N_NRN_Y, N_SYN_X, N_SYN_Y, // Number of neurons / synapses
NC_NRN_X, NC_NRN_Y, NC_SYN_X, NC_SYN_Y,
N_SYN_DLY_CFG,
N_NRN_MON_X, N_NRN_MON_Y, N_SYN_MON_X, N_SYN_MON_Y,
N_MON_AMZO_PER_SYN, N_MON_AMZO_PER_NRN, // Number of signals that each synapse outputs to be monitored.
N_FLAGS_PER_SYN, N_FLAGS_PER_NRN, // Number of signals that each nrn/syn recieves from the register.
N_BUFFERS,
N_LINE_PD_DLY, // Number of dummy delays to add line pull down
N_BD_DLY_CFG, N_BD_DLY_CFG2,
REG_NCA, REG_NCW, REG_M>
defproc texel_dualcore (bd<N_IN> in, out;
Mx1of2<REG_NCW> c1_reg_data[REG_M];
a1of1 c1_synapses[N_SYN_X * N_SYN_Y];
a1of1 c1_neurons[N_NRN_X * N_NRN_Y];
bool! c1_nrn_mon_x[N_NRN_MON_X], c1_nrn_mon_y[N_NRN_MON_Y];
bool! c1_syn_mon_x[N_SYN_MON_X], c1_syn_mon_y[N_SYN_MON_Y];
bool? c1_syn_mon_AMZI[N_SYN_X * N_MON_AMZO_PER_SYN], c1_nrn_mon_AMZI[N_NRN_X * N_MON_AMZO_PER_NRN];
bool! c1_syn_mon_AMZO[N_MON_AMZO_PER_SYN], c1_nrn_mon_AMZO[N_MON_AMZO_PER_NRN];
bool! c1_syn_flags_EFO[N_FLAGS_PER_SYN], c1_nrn_flags_EFO[N_FLAGS_PER_NRN];
Mx1of2<REG_NCW> c2_reg_data[REG_M];
a1of1 c2_synapses[N_SYN_X * N_SYN_Y];
a1of1 c2_neurons[N_NRN_X * N_NRN_Y];
bool! c2_nrn_mon_x[N_NRN_MON_X], c2_nrn_mon_y[N_NRN_MON_Y];
bool! c2_syn_mon_x[N_SYN_MON_X], c2_syn_mon_y[N_SYN_MON_Y];
bool? c2_syn_mon_AMZI[N_SYN_X * N_MON_AMZO_PER_SYN], c2_nrn_mon_AMZI[N_NRN_X * N_MON_AMZO_PER_NRN];
bool! c2_syn_mon_AMZO[N_MON_AMZO_PER_SYN], c2_nrn_mon_AMZO[N_MON_AMZO_PER_NRN];
bool! c2_syn_flags_EFO[N_FLAGS_PER_SYN], c2_nrn_flags_EFO[N_FLAGS_PER_NRN];
bool? bd_dly_cfg[N_BD_DLY_CFG], bd_dly_cfg2[N_BD_DLY_CFG2];
bool? loopback_en;
power supply;
bool? reset_B){
// Reset buffers
bool _reset_BX;
BUF_X12 reset_buf(.a = reset_B, .y = _reset_BX, .vdd = supply.vdd, .vss = supply.vss);
bd2qdi<N_IN, N_BD_DLY_CFG, N_BD_DLY_CFG2> _bd2qdi(.in = in, .dly_cfg = bd_dly_cfg, .dly_cfg2 = bd_dly_cfg2,
.reset_B = _reset_BX, .supply = supply);
fifo<N_IN,N_BUFFERS> fifo_in2fork(.in = _bd2qdi.out, .reset_B = _reset_BX, .supply = supply);
fork<N_IN> _fork(.in = fifo_in2fork.out, .reset_B = _reset_BX, .supply = supply);
// Loopback
fifo<N_IN,N_BUFFERS> fifo_fork2drop(.in = _fork.out1, .reset_B = _reset_BX, .supply = supply);
dropper_static<N_IN, false> _loopback_dropper(.in = fifo_fork2drop.out, .cond = loopback_en,
.supply = supply);
fifo<N_IN,N_BUFFERS> fifo_drop2mrg(.in = _loopback_dropper.out, .reset_B = _reset_BX, .supply = supply);
// Onwards to core demux
fifo<N_IN,N_BUFFERS> fifo_fork2dmx(.in = _fork.out2, .reset_B = _reset_BX, .supply = supply);
demux_bit_msb<N_IN-1> core_dmx(.in = fifo_fork2dmx.out, .reset_B = _reset_BX, .supply = supply);
fifo<N_IN-1,N_BUFFERS> fifo_dmx2core1(.in = core_dmx.out1, .reset_B = _reset_BX, .supply = supply);
fifo<N_IN-1,N_BUFFERS> fifo_dmx2core2(.in = core_dmx.out2, .reset_B = _reset_BX, .supply = supply);
// Cores
texel_core<N_IN-1,N_NRN_X, N_NRN_Y, N_SYN_X, N_SYN_Y,NC_NRN_X, NC_NRN_Y, NC_SYN_X, NC_SYN_Y,N_SYN_DLY_CFG,N_NRN_MON_X, N_NRN_MON_Y, N_SYN_MON_X, N_SYN_MON_Y,N_MON_AMZO_PER_SYN, N_MON_AMZO_PER_NRN,N_FLAGS_PER_SYN, N_FLAGS_PER_NRN,N_BUFFERS,N_LINE_PD_DLY, REG_NCA, REG_NCW, REG_M>
core1(.in = fifo_dmx2core1.out,
.reg_data = c1_reg_data,
.synapses = c1_synapses,
.neurons = c1_neurons,
.nrn_mon_x = c1_nrn_mon_x, .nrn_mon_y = c1_nrn_mon_y,
.syn_mon_x = c1_syn_mon_x, .syn_mon_y = c1_syn_mon_y,
.syn_mon_AMZI = c1_syn_mon_AMZI, .nrn_mon_AMZI = c1_nrn_mon_AMZI,
.syn_mon_AMZO = c1_syn_mon_AMZO, .nrn_mon_AMZO = c1_nrn_mon_AMZO,
.syn_flags_EFO = c1_syn_flags_EFO, .nrn_flags_EFO = c1_nrn_flags_EFO,
.reset_B = _reset_BX,
.supply = supply
);
texel_core<N_IN-1,N_NRN_X, N_NRN_Y, N_SYN_X, N_SYN_Y,NC_NRN_X, NC_NRN_Y, NC_SYN_X, NC_SYN_Y,N_SYN_DLY_CFG,N_NRN_MON_X, N_NRN_MON_Y, N_SYN_MON_X, N_SYN_MON_Y,N_MON_AMZO_PER_SYN, N_MON_AMZO_PER_NRN,N_FLAGS_PER_SYN, N_FLAGS_PER_NRN,N_BUFFERS,N_LINE_PD_DLY, REG_NCA, REG_NCW, REG_M>
core2(.in = fifo_dmx2core2.out,
.reg_data = c2_reg_data,
.synapses = c2_synapses,
.neurons = c2_neurons,
.nrn_mon_x = c2_nrn_mon_x, .nrn_mon_y = c2_nrn_mon_y,
.syn_mon_x = c2_syn_mon_x, .syn_mon_y = c2_syn_mon_y,
.syn_mon_AMZI = c2_syn_mon_AMZI, .nrn_mon_AMZI = c2_nrn_mon_AMZI,
.syn_mon_AMZO = c2_syn_mon_AMZO, .nrn_mon_AMZO = c2_nrn_mon_AMZO,
.syn_flags_EFO = c2_syn_flags_EFO, .nrn_flags_EFO = c2_nrn_flags_EFO,
.reset_B = _reset_BX,
.supply = supply
);
fifo<N_IN-1,N_BUFFERS> fifo_core1out(.in = core1.out, .reset_B = _reset_BX, .supply = supply);
fifo<N_IN-1,N_BUFFERS> fifo_core2out(.in = core2.out, .reset_B = _reset_BX, .supply = supply);
// Merge cores
append<N_IN-1, 1, 0> append_core1(.in = fifo_core1out.out, .supply = supply);
append<N_IN-1, 1, 1> append_core2(.in = fifo_core2out.out, .supply = supply);
merge<N_IN> merge_core1x2(.in1 = append_core1.out, .in2 = append_core2.out,
.supply = supply, .reset_B = _reset_BX);
// Merge cores and loopback
merge<N_IN> merge_drop8core(.in1 = merge_core1x2.out, .in2 = fifo_drop2mrg.out,
.reset_B = _reset_BX, .supply = supply);
// qdi2bd
fifo<N_IN, N_BUFFERS> fifo_mrg2bd(.in = merge_drop8core.out,
.reset_B = _reset_BX, .supply = supply);
qdi2bd<N_IN, N_BD_DLY_CFG> _qdi2bd(.in = fifo_mrg2bd.out, .out = out, .dly_cfg = bd_dly_cfg,
.reset_B = _reset_BX, .supply = supply);
}
}
}
}

View File

@ -86,6 +86,66 @@ defproc decoder_dualrail (Mx1of2<Nc> in; bool? out[N]; power supply) {
)
}
/**
* Dualrail decoder, but the signals to the decoders are refreshed every 48 gates.
* final_refresh is signal at the end of the refresh line.
* Is needed for doing validity checking etc, since it is the laggiest signal.
*/
export template<pint Nc, N>
defproc decoder_dualrail_refresh (Mx1of2<Nc> in; bool? out[N]; Mx1of2<Nc> final_refresh; power supply) {
// signal buffers
pint index;
pint NUM_OUTS_PER_BUF = 96;
pint NUM_REFRESH = N/(NUM_OUTS_PER_BUF); // x2 bc only half the output bits look for it.
// NUM_REFRESH = 0;
BUF_X12 in_tX[Nc*(NUM_REFRESH+1)];
BUF_X12 in_fX[Nc*(NUM_REFRESH+1)];
(i:Nc:
// Connect start
in_tX[i].a = in.d[i].t;
in_fX[i].a = in.d[i].f;
// Connect mid bois
(j:NUM_REFRESH:
index = i + (1+j)*Nc;
in_tX[index].a = in_tX[index-Nc].y;
in_fX[index].a = in_fX[index-Nc].y;
)
// Connect end
in_tX[i+NUM_REFRESH*Nc].y = final_refresh.d[i].t;
in_fX[i+NUM_REFRESH*Nc].y = final_refresh.d[i].f;
)
(i:Nc*(NUM_REFRESH+1):
in_tX[i].vdd = supply.vdd;
in_tX[i].vss = supply.vss;
in_fX[i].vdd = supply.vdd;
in_fX[i].vss = supply.vss;
)
// AND trees
pint bitval;
andtree<Nc> atree[N];
(k:0..N-1:atree[k].supply = supply;)
(i:0..N-1:
(j:0..Nc-1:
bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
[bitval = 1 ->
atree[i].in[j] = in_tX[j+((i/NUM_OUTS_PER_BUF)*Nc)].y;
// atree[i].in[j] = addr_buf.out.d.d[j].t;
[]bitval = 0 ->
atree[i].in[j] = in_fX[j+((i/NUM_OUTS_PER_BUF)*Nc)].y;
// atree[i].in[j] = addr_buf.out.d.d[j].f;
[]bitval >= 2 -> {false : "fuck"};
]
atree[i].out = out[i];
)
)
}
/**
* Dualrail decoder with buffered outputs.
* Be careful of out[] indexing.
@ -110,19 +170,38 @@ defproc decoder_dualrail_x(Mx1of2<Nc> in; bool? out[N]; power supply) {
*/
export template<pint Nc, N>
defproc decoder_dualrail_en(Mx1of2<Nc> in; bool? en, out[N]; power supply) {
decoder_dualrail<Nc, N> decoder(.in = in, .supply = supply);
sigbuf<N> sb_en(.in = en, .supply = supply);
AND2_X1 en_ands[N];
(i:N:
en_ands[i].a = decoder.out[i];
en_ands[i].b = sb_en.out[i];
decoder_dualrail_refresh<Nc, N> decoder(.out = out, .supply = supply);
en_ands[i].vdd = supply.vdd;
en_ands[i].vss = supply.vss;
sigbuf<Nc*2> sb_en(.in = en, .supply = supply);
// AND2_X1 en_ands[N];
// (i:N:
// en_ands[i].a = decoder.out[i];
// en_ands[i].b = sb_en.out[i];
en_ands[i].y = out[i];
// en_ands[i].vdd = supply.vdd;
// en_ands[i].vss = supply.vss;
// en_ands[i].y = out[i];
// )
AND2_X1 en_ands_t[Nc];
AND2_X1 en_ands_f[Nc];
(i:Nc:
en_ands_t[i].a = in.d[i].t;
en_ands_f[i].a = in.d[i].f;
en_ands_t[i].b = sb_en.out[i];
en_ands_f[i].b = sb_en.out[i+Nc];
en_ands_t[i].y = decoder.in.d[i].t;
en_ands_f[i].y = decoder.in.d[i].f;
en_ands_t[i].vdd = supply.vdd;
en_ands_t[i].vss = supply.vss;
en_ands_f[i].vdd = supply.vdd;
en_ands_f[i].vss = supply.vss;
)
}
@ -173,8 +252,10 @@ defproc decoder_2d_dly (avMx1of2<NxC+NyC> in; bool? outx[Nx], outy[Ny],
export template<pint Nx, Ny>
defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
// Buffer inputs
sigbuf<Ny> xbuf[Nx];
sigbuf<Nx> ybuf[Ny];
// sigbuf<Ny> xbuf[Nx];
// sigbuf<Nx> ybuf[Ny];
sigbuf<47> xbuf[Nx]; // BUFFERING DISABLED FOR NOW
sigbuf<47> ybuf[Ny]; // CUS GET BUFFERED IN THE CORE
(i:Nx:
xbuf[i].in = inx[i];
xbuf[i].supply = supply;
@ -188,8 +269,8 @@ defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
(i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;)
(x:0..Nx-1:
(y:0..Ny-1:
ands[x + y*Nx].a = xbuf[x].out[y];
ands[x + y*Nx].b = ybuf[y].out[x];
ands[x + y*Nx].a = xbuf[x].out[0];
ands[x + y*Nx].b = ybuf[y].out[0];
ands[x + y*Nx].y = out[x + y*Nx];
)
)
@ -310,9 +391,11 @@ defproc decoder_2d_hs (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? reset_B; po
* goes through the prog_delay block.
* Thus, for the handshaking version to be used "correctly",
* dly_cfg should be set to all zeros.
* ack_disable blocks the ack being returned to the buffer.
* Is needed in case there are instabilities while we fiddle with delays.
*/
export template<pint NxC, NyC, Nx, Ny, N_dly_cfg>
defproc decoder_2d_hybrid (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? dly_cfg[N_dly_cfg], hs_en,
defproc decoder_2d_hybrid (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? dly_cfg[N_dly_cfg], hs_en, ack_disable,
reset_B; power supply) {
bool _reset_BX[Nx];
@ -325,32 +408,33 @@ defproc decoder_2d_hybrid (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? dly_cfg
buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
// Decoder X/Y And trees
decoder_dualrail<NxC,Nx> d_dr_x(.supply = supply);
decoder_dualrail_refresh<NxC,Nx> d_dr_x(.supply = supply);
(i:0..NxC-1:d_dr_x.in.d[i] = addr_buf.out.d.d[i];)
decoder_dualrail<NyC,Ny> d_dr_y(.supply = supply);
decoder_dualrail_refresh<NyC,Ny> d_dr_y(.supply = supply);
(i:0..NyC-1:d_dr_y.in.d[i] = addr_buf.out.d.d[i+NxC];)
// sig buf for reqx lines, since they go to synapse pull down gates.
// Signals to the and-grid are buffered therein.
sigbuf<Ny+1> d_dr_xX[Nx];
(i:Nx:
d_dr_xX[i].in = d_dr_x.out[i];
d_dr_xX[i].supply = supply;
)
sigbuf_boolarray<Nx,15> d_dr_xX(.in = d_dr_x.out, .supply = supply);
// sigbuf<15> d_dr_xX[Nx]; // GET REFRESHED IN CORE
// (i:Nx:
// d_dr_xX[i].in = d_dr_x.out[i];
// d_dr_xX[i].supply = supply;
// )
sigbuf_boolarray<Ny,47> d_dr_yX(.in = d_dr_y.out, .supply = supply);
// Validity
vtree<NxC> vtree_x (.supply = supply);
vtree<NyC> vtree_y (.supply = supply);
(i:0..NxC-1:vtree_x.in.d[i].t = addr_buf.out.d.d[i].t;)
(i:0..NxC-1:vtree_x.in.d[i].f = addr_buf.out.d.d[i].f;)
(i:0..NyC-1:vtree_y.in.d[i].t = addr_buf.out.d.d[i+NxC].t;)
(i:0..NyC-1:vtree_y.in.d[i].f = addr_buf.out.d.d[i+NxC].f;)
vtree<NxC> vtree_x (.in = d_dr_x.final_refresh, .supply = supply);
vtree<NyC> vtree_y (.in = d_dr_y.final_refresh, .supply = supply);
A_2C_B_X1 valid_Cel(.c1 = vtree_x.out, .c2 = vtree_y.out, .y = addr_buf.out.v,
.vdd = supply.vdd, .vss = supply.vss);
// and grid for reqs into synapses
and_grid<Nx, Ny> _and_grid(.inx = d_dr_x.out, .iny = d_dr_y.out, .supply = supply);
and_grid<Nx, Ny> _and_grid(.inx = d_dr_xX.out, .iny = d_dr_yX.out, .supply = supply);
(i:Nx*Ny: out[i].r = _and_grid.out[i];)
// Acknowledge pull down time
@ -363,7 +447,7 @@ defproc decoder_2d_hybrid (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? dly_cfg
(j:Ny:
index = i + Nx*j;
ack_pulldowns[index].n1 = out[index].a;
ack_pulldowns[index].n2 = d_dr_xX[i].out[j];
ack_pulldowns[index].n2 = d_dr_xX.out[i]; // GET REFRHRESED IN CORE
ack_pulldowns[index].y = _out_acksB[i];
ack_pulldowns[index].vss = supply.vss;
ack_pulldowns[index].vdd = supply.vdd;
@ -378,7 +462,7 @@ defproc decoder_2d_hybrid (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? dly_cfg
A_2P_U_X4 pu[Nx]; // TODO probably replace this with variable strength PU
A_1P_U_X4 pu_reset[Nx];
(i:Nx:
pu[i].p1 = d_dr_xX[i].out[Ny];
pu[i].p1 = d_dr_xX.out[i];
pu[i].p2 = hs_enB;
pu[i].y = _out_acksB[i];
pu[i].vdd = supply.vdd;
@ -423,7 +507,13 @@ defproc decoder_2d_hybrid (avMx1of2<NxC+NyC> in; a1of1 out[Nx*Ny]; bool? dly_cfg
.vdd = supply.vdd, .vss = supply.vss);
// Programmable delay
delayprog<N_dly_cfg> dly(.in = ack_mux.y, .out = addr_buf.out.a, .s = dly_cfg, .supply = supply);
delayprog<N_dly_cfg> dly(.in = ack_mux.y, .s = dly_cfg, .supply = supply);
// Final switch from register to maybe block the ack
INV_X1 ack_disableB(.a = ack_disable, .vdd = supply.vdd, .vss = supply.vss);
AND2_X1 ack_block(.a = dly.out, .b = ack_disableB.y, .y = addr_buf.out.a,
.vdd = supply.vdd, .vss = supply.vss);
}

View File

@ -418,7 +418,7 @@ defproc sigbuf (bool? in; bool! out[N]; power supply)
{
{ N >= 0 : "sigbuf: parameter error" };
{ N <= 43 : "sigbuf: parameter error, N too big" };
{ N <= 128 : "sigbuf: parameter error, N too big" };
/* -- just use in sized driver here -- */
[ N <= 4 ->
@ -433,8 +433,18 @@ defproc sigbuf (bool? in; bool! out[N]; power supply)
BUF_X6 buf6 (.a = in, .y = out[0], .vdd = supply.vdd, .vss = supply.vss);
[] N >= 19 & N <= 29 ->
BUF_X8 buf8 (.a = in, .y = out[0], .vdd = supply.vdd, .vss = supply.vss);
[] N >= 30 & N <= 42 ->
[] N >= 30 & N <= 48->
BUF_X12 buf12 (.a = in, .y = out[0], .vdd = supply.vdd, .vss = supply.vss);
[] N >= 49 & N <= 64 ->
BUF_X16 buf16 (.a = in, .y = out[0], .vdd = supply.vdd, .vss = supply.vss);
[] N >= 65 & N <= 96 ->
BUF_X24 buf24 (.a = in, .y = out[0], .vdd = supply.vdd, .vss = supply.vss);
[] N >= 97 & N <=128 ->
BUF_X32 buf32 (.a = in, .y = out[0], .vdd = supply.vdd, .vss = supply.vss);
// [] N >= 129 & N <=192 ->
// BUF_X48 buf48 (.a = in, .y = out[0], .vdd = supply.vdd, .vss = supply.vss);
// [] N >= 193 & N <= 256->
// BUF_X64 buf64 (.a = in, .y = out[0], .vdd = supply.vdd, .vss = supply.vss);
]
(i:1..N-1:out[i]=out[0];)
}
@ -445,7 +455,7 @@ defproc sigbuf_1output (bool? in; bool! out; power supply)
{
{ N >= 0 : "sigbuf: parameter error" };
{ N <= 43 : "sigbuf: parameter error, N too big" };
{ N <= 43 : "sigbuf: parameter error, N too big" };
/* -- just use in sized driver here -- */
[ N <= 4 ->

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,159 @@
/*************************************************************************
*
* This file is part of ACT dataflow neuro library.
* It's the testing facility for cell_lib_std.act
*
* Copyright (c) 2022 University of Groningen - Ole Richter
* Copyright (c) 2022 University of Groningen - Hugh Greatorex
* Copyright (c) 2022 University of Groningen - Michele Mastella
* Copyright (c) 2022 University of Groningen - Madison Cotteret
*
* This source describes Open Hardware and is licensed under the CERN-OHL-W v2 or later
*
* You may redistribute and modify this documentation and make products
* using it under the terms of the CERN-OHL-W v2 (https:/cern.ch/cern-ohl).
* This documentation is distributed WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY, INCLUDING OF MERCHANTABILITY, SATISFACTORY QUALITY
* AND FITNESS FOR A PARTICULAR PURPOSE. Please see the CERN-OHL-W v2
* for applicable conditions.
*
* Source location: https://git.web.rug.nl/bics/actlib_dataflow_neuro
*
* As per CERN-OHL-W v2 section 4.1, should You produce hardware based on
* these sources, You must maintain the Source Location visible in its
* documentation.
*
**************************************************************************
*/
import "../../dataflow_neuro/coders.act";
import "../../dataflow_neuro/primitives.act";
import "../../dataflow_neuro/chips.act";
import globals;
import std::data;
open std::data;
open tmpl::dataflow_neuro;
pint N_IN = 32;
pint N_NRN_X = 15;
pint N_NRN_Y = 6;
pint NC_NRN_X = 4;
pint NC_NRN_Y = 3;
pint N_SYN_X = 15;
pint N_SYN_Y = 348;
pint NC_SYN_X = 6;
pint NC_SYN_Y = 9;
pint N_SYN_DLY_CFG = 4;
pint N_BD_DLY_CFG = 4;
pint N_BD_DLY_CFG2 = 2;
pint N_NRN_MON_X = N_NRN_X*2; // [mon,kill]*N
pint N_NRN_MON_Y = N_NRN_Y; // [mon]*N
pint N_SYN_MON_X = N_SYN_X*4; // [mon, dev_mon, set, reset]*N
pint N_SYN_MON_Y = N_SYN_Y; // [mon]*N
pint N_MON_AMZO_PER_SYN = 5;
pint N_MON_AMZO_PER_NRN = 7;
pint N_FLAGS_PER_SYN = 4; // Syn: Must be at least 3 (since those ones have special safety)
pint N_FLAGS_PER_NRN = 9; // and leq than the number of bits in a reg, since have presumed only needs one.
pint N_BUFFERS = 3;
pint N_LINE_PD_DLY = 3;
pint REG_NCA = 6;
pint REG_M = 1<<REG_NCA;
pint REG_NCW = 23;
defproc chip_texel_dualcore (bd<N_IN> in, out;
Mx1of2<REG_NCW> c1_reg_data[REG_M];
a1of1 c1_synapses[N_SYN_X * N_SYN_Y];
a1of1 c1_neurons[N_NRN_X * N_NRN_Y];
// bool c1_syn_r[N_SYN_X * N_SYN_Y];
// bool c1_syn_a[N_SYN_X * N_SYN_Y];
// bool c1_nrn_r[N_NRN_X * N_NRN_Y];
// bool c1_nrn_a[N_NRN_X * N_NRN_Y];
bool! c1_nrn_mon_x[N_NRN_MON_X], c1_nrn_mon_y[N_NRN_MON_Y];
bool! c1_syn_mon_x[N_SYN_MON_X], c1_syn_mon_y[N_SYN_MON_Y];
bool? c1_syn_mon_AMZI[N_SYN_X * N_MON_AMZO_PER_SYN], c1_nrn_mon_AMZI[N_NRN_X * N_MON_AMZO_PER_NRN];
bool! c1_syn_mon_AMZO[N_MON_AMZO_PER_SYN], c1_nrn_mon_AMZO[N_MON_AMZO_PER_NRN];
bool! c1_syn_flags_EFO[N_FLAGS_PER_SYN], c1_nrn_flags_EFO[N_FLAGS_PER_NRN];
Mx1of2<REG_NCW> c2_reg_data[REG_M];
a1of1 c2_synapses[N_SYN_X * N_SYN_Y];
a1of1 c2_neurons[N_NRN_X * N_NRN_Y];
// bool c2_syn_r[N_SYN_X * N_SYN_Y];
// bool c2_syn_a[N_SYN_X * N_SYN_Y];
// bool c2_nrn_r[N_NRN_X * N_NRN_Y];
// bool c2_nrn_a[N_NRN_X * N_NRN_Y];
bool! c2_nrn_mon_x[N_NRN_MON_X], c2_nrn_mon_y[N_NRN_MON_Y];
bool! c2_syn_mon_x[N_SYN_MON_X], c2_syn_mon_y[N_SYN_MON_Y];
bool? c2_syn_mon_AMZI[N_SYN_X * N_MON_AMZO_PER_SYN], c2_nrn_mon_AMZI[N_NRN_X * N_MON_AMZO_PER_NRN];
bool! c2_syn_mon_AMZO[N_MON_AMZO_PER_SYN], c2_nrn_mon_AMZO[N_MON_AMZO_PER_NRN];
bool! c2_syn_flags_EFO[N_FLAGS_PER_SYN], c2_nrn_flags_EFO[N_FLAGS_PER_NRN];
bool? bd_dly_cfg[N_BD_DLY_CFG], bd_dly_cfg2[N_BD_DLY_CFG2];
bool? loopback_en){
bool _reset_B;
prs {
Reset => _reset_B-
}
power supply;
supply.vdd = Vdd;
supply.vss = GND;
// a1of1 c1_synapses[N_SYN_X * N_SYN_Y];
// a1of1 c1_neurons[N_NRN_X * N_NRN_Y];
// a1of1 c2_synapses[N_SYN_X * N_SYN_Y];
// a1of1 c2_neurons[N_NRN_X * N_NRN_Y];
// (i:N_SYN_X * N_SYN_Y:
// c1_synapses[i].r = c1_syn_r[i];
// c2_synapses[i].r = c2_syn_r[i];
// c1_synapses[i].a = c1_syn_a[i];
// c2_synapses[i].a = c2_syn_a[i];
// )
// (i:N_NRN_X * N_NRN_Y:
// c1_neurons[i].r = c1_nrn_r[i];
// c2_neurons[i].r = c2_nrn_r[i];
// c1_neurons[i].a = c1_nrn_a[i];
// c2_neurons[i].a = c2_nrn_a[i];
// )
texel_dualcore<N_IN,
N_NRN_X, N_NRN_Y, N_SYN_X, N_SYN_Y,
NC_NRN_X, NC_NRN_Y, NC_SYN_X, NC_SYN_Y,
N_SYN_DLY_CFG,
N_NRN_MON_X, N_NRN_MON_Y, N_SYN_MON_X, N_SYN_MON_Y,
N_MON_AMZO_PER_SYN, N_MON_AMZO_PER_NRN,
N_FLAGS_PER_SYN, N_FLAGS_PER_NRN,
N_BUFFERS,
N_LINE_PD_DLY,
N_BD_DLY_CFG, N_BD_DLY_CFG2,
REG_NCA, REG_NCW, REG_M> c(.in = in, .out = out,
.c1_reg_data = c1_reg_data, .c1_synapses = c1_synapses, .c1_neurons = c1_neurons, .c1_nrn_mon_x = c1_nrn_mon_x, .c1_nrn_mon_y = c1_nrn_mon_y, .c1_syn_mon_x = c1_syn_mon_x, .c1_syn_mon_y = c1_syn_mon_y, .c1_syn_mon_AMZI = c1_syn_mon_AMZI, .c1_nrn_mon_AMZI = c1_nrn_mon_AMZI, .c1_syn_mon_AMZO = c1_syn_mon_AMZO, .c1_nrn_mon_AMZO = c1_nrn_mon_AMZO, .c1_syn_flags_EFO = c1_syn_flags_EFO, .c1_nrn_flags_EFO = c1_nrn_flags_EFO, .c2_reg_data = c2_reg_data, .c2_synapses = c2_synapses, .c2_neurons = c2_neurons, .c2_nrn_mon_x = c2_nrn_mon_x, .c2_nrn_mon_y = c2_nrn_mon_y, .c2_syn_mon_x = c2_syn_mon_x, .c2_syn_mon_y = c2_syn_mon_y, .c2_syn_mon_AMZI = c2_syn_mon_AMZI, .c2_nrn_mon_AMZI = c2_nrn_mon_AMZI, .c2_syn_mon_AMZO = c2_syn_mon_AMZO, .c2_nrn_mon_AMZO = c2_nrn_mon_AMZO, .c2_syn_flags_EFO = c2_syn_flags_EFO, .c2_nrn_flags_EFO = c2_nrn_flags_EFO, .bd_dly_cfg = bd_dly_cfg, .bd_dly_cfg2 = bd_dly_cfg2,
.loopback_en = loopback_en, .supply = supply, .reset_B = _reset_B);
}
// fifo_decoder_neurons_encoder_fifo e;
chip_texel_dualcore c;

File diff suppressed because it is too large Load Diff