From 351b3ea151534d7a9f71bc5ed0c28128025cf2b6 Mon Sep 17 00:00:00 2001 From: alexmadison Date: Wed, 20 Apr 2022 18:24:43 +0200 Subject: [PATCH] disentangled synapse handshake circuits, passing tests --- dataflow_neuro/chips.act | 12 +++++- dataflow_neuro/coders.act | 88 +++++++++++++++++++++++---------------- 2 files changed, 63 insertions(+), 37 deletions(-) diff --git a/dataflow_neuro/chips.act b/dataflow_neuro/chips.act index 105db85..f03be1f 100644 --- a/dataflow_neuro/chips.act +++ b/dataflow_neuro/chips.act @@ -93,7 +93,6 @@ defproc texel_core (avMx1of2 in, out; slice_data slice_pre_dec(.in = _demux.out1, .supply = supply); fifo fifo_dmx2dec(.in = slice_pre_dec.out, .reset_B = _reset_BX, .supply = supply); decoder_2d_hybrid decoder(.in = fifo_dmx2dec.out, - .out = synapses, .hs_en = register.data[0].d[0].t, // Defaults to handshake disable .ack_disable = register.data[1].d[2].t, // Defaults to ack enabled .supply = supply, .reset_B = _reset_BX); @@ -103,7 +102,16 @@ defproc texel_core (avMx1of2 in, out; dly_cfg_inverters[i].vdd = supply.vdd; dly_cfg_inverters[i].vss = supply.vss; decoder.dly_cfg[i] = dly_cfg_inverters[i].y; - ) // This sexy hack means that the inverters are max delay throughout the register flush operations. + ) + + // Synapse handshake circuits, to be removed for innovus + decoder_2d_synapse_hs _synapses( + .synapses = synapses, + .in_req_x = decoder.out_req_x, .in_req_y = decoder.out_req_y, + .in_ackB_pullups = decoder.out_ackB_pullups, + .out_req_x_pullups = decoder.in_req_x_pullups, + .out_ackB_decoder = decoder.in_ackB_decoder, + .supply = supply); // Neurons + encoder pint NC_NRN; diff --git a/dataflow_neuro/coders.act b/dataflow_neuro/coders.act index 4768a87..1176f7b 100644 --- a/dataflow_neuro/coders.act +++ b/dataflow_neuro/coders.act @@ -382,6 +382,45 @@ defproc decoder_2d_hs (avMx1of2 in; a1of1 out[Nx*Ny]; bool? reset_B; po } + +/** + * Synapse handshaking stuff which exists in the core, and so will not be spawned in + * when innovusing all the periphery. + */ +export template +defproc decoder_2d_synapse_hs (bool? in_req_x[Nx], in_req_y[Ny]; a1of1 synapses[Nx*Ny]; + bool out_ackB_decoder[Nx]; + bool in_ackB_pullups[Nx]; + bool out_req_x_pullups[Nx]; + power supply) { + + + // and grid for reqs into synapses + and_grid _and_grid(.inx = in_req_x, .iny = in_req_y, .supply = supply); + (i:Nx*Ny: synapses[i].r = _and_grid.out[i];) + + + // Pull DOWNs on the ackB lines by synapses (easier to invert). + A_2N_U_X4 ack_pulldowns[Nx*Ny]; + pint index; + (i:Nx: + (j:Ny: + index = i + Nx*j; + ack_pulldowns[index].n1 = synapses[index].a; + ack_pulldowns[index].n2 = in_req_x[i]; // GET REFRHRESED IN CORE + ack_pulldowns[index].y = out_ackB_decoder[i]; + ack_pulldowns[index].vss = supply.vss; + ack_pulldowns[index].vdd = supply.vdd; + ) + ) + + // Connect the ackB lines together + (i:Nx: out_ackB_decoder[i] = in_ackB_pullups[i];) + + // Pipe req x lines down to the ackB pullups + (i:Nx: out_req_x_pullups[i] = in_req_x[i];) +} + /** * 2D decoder which uses either synapse handshaking, or just a delay. * Controlled by the "hs_en" (handshake_enable) config bit. @@ -395,8 +434,11 @@ defproc decoder_2d_hs (avMx1of2 in; a1of1 out[Nx*Ny]; bool? reset_B; po * Is needed in case there are instabilities while we fiddle with delays. */ export template -defproc decoder_2d_hybrid (avMx1of2 in; a1of1 out[Nx*Ny]; bool? dly_cfg[N_dly_cfg], hs_en, ack_disable, - reset_B; power supply) { +defproc decoder_2d_hybrid (avMx1of2 in; bool! out_req_x[Nx], out_req_y[Ny]; bool? dly_cfg[N_dly_cfg], hs_en, ack_disable; + bool in_ackB_decoder[Nx]; // AckB lines back to the decoder for handshaking + bool out_ackB_pullups[Nx]; // AckB lines from the line end pull ups + bool in_req_x_pullups[Nx]; // req x lines going to the line pull ups + bool? reset_B; power supply) { bool _reset_BX[Nx]; sigbuf reset_sb(.in = reset_B, .out = _reset_BX, .supply = supply); @@ -416,13 +458,9 @@ defproc decoder_2d_hybrid (avMx1of2 in; a1of1 out[Nx*Ny]; bool? dly_cfg // sig buf for reqx lines, since they go to synapse pull down gates. // Signals to the and-grid are buffered therein. sigbuf_boolarray d_dr_xX(.in = d_dr_x.out, .supply = supply); - - // sigbuf<15> d_dr_xX[Nx]; // GET REFRESHED IN CORE - // (i:Nx: - // d_dr_xX[i].in = d_dr_x.out[i]; - // d_dr_xX[i].supply = supply; - // ) + d_dr_xX.out = out_req_x; sigbuf_boolarray d_dr_yX(.in = d_dr_y.out, .supply = supply); + d_dr_yX.out = out_req_y; // Validity @@ -432,28 +470,7 @@ defproc decoder_2d_hybrid (avMx1of2 in; a1of1 out[Nx*Ny]; bool? dly_cfg .vdd = supply.vdd, .vss = supply.vss); - // and grid for reqs into synapses - - and_grid _and_grid(.inx = d_dr_xX.out, .iny = d_dr_yX.out, .supply = supply); - (i:Nx*Ny: out[i].r = _and_grid.out[i];) - - // Acknowledge pull down time - - // Pull DOWNs on the ackB lines by synapses (easier to invert). - bool _out_acksB[Nx]; // The vertical output ack lines from each syn. - A_2N_U_X4 ack_pulldowns[Nx*Ny]; - pint index; - (i:Nx: - (j:Ny: - index = i + Nx*j; - ack_pulldowns[index].n1 = out[index].a; - ack_pulldowns[index].n2 = d_dr_xX.out[i]; // GET REFRHRESED IN CORE - ack_pulldowns[index].y = _out_acksB[i]; - ack_pulldowns[index].vss = supply.vss; - ack_pulldowns[index].vdd = supply.vdd; - ) - ) - + // Line end pull UPs (triggered once reqs removed) // Use two pullups rather than and-pullup // bc smaller @@ -462,14 +479,14 @@ defproc decoder_2d_hybrid (avMx1of2 in; a1of1 out[Nx*Ny]; bool? dly_cfg A_2P_U_X4 pu[Nx]; // TODO probably replace this with variable strength PU A_1P_U_X4 pu_reset[Nx]; (i:Nx: - pu[i].p1 = d_dr_xX.out[i]; + pu[i].p1 = in_req_x_pullups[i]; pu[i].p2 = hs_enB; - pu[i].y = _out_acksB[i]; + pu[i].y = out_ackB_pullups[i]; pu[i].vdd = supply.vdd; pu[i].vss = supply.vss; pu_reset[i].p1 = _reset_BX[i]; - pu_reset[i].y = _out_acksB[i]; + pu_reset[i].y = out_ackB_pullups[i]; pu_reset[i].vdd = supply.vdd; pu_reset[i].vss = supply.vss; ) @@ -479,16 +496,17 @@ defproc decoder_2d_hybrid (avMx1of2 in; a1of1 out[Nx*Ny]; bool? dly_cfg (i:Nx: keeps[i].vdd = supply.vdd; keeps[i].vss = supply.vss; - keeps[i].y = _out_acksB[i]; + keeps[i].y = out_ackB_pullups[i]; ) + // ORtree from all output acks, back to the buffer ack. // This is instead of the ack that came from the delayed validity trees, // in decoder_2d_dly. ortree _ortree(.supply = supply); INV_X1 out_ack_invs[Nx]; (i:Nx: - out_ack_invs[i].a = _out_acksB[i]; + out_ack_invs[i].a = in_ackB_decoder[i]; out_ack_invs[i].vdd = supply.vdd; out_ack_invs[i].vss = supply.vss;