/************************************************************************* * * This file is part of ACT dataflow neuro library * * Copyright (c) 2022 University of Groningen - Ole Richter * Copyright (c) 2022 University of Groningen - Michele Mastella * Copyright (c) 2022 University of Groningen - Hugh Greatorex * Copyright (c) 2022 University of Groningen - Madison Cotteret * * * This source describes Open Hardware and is licensed under the CERN-OHL-W v2 or later * * You may redistribute and modify this documentation and make products * using it under the terms of the CERN-OHL-W v2 (https:/cern.ch/cern-ohl). * This documentation is distributed WITHOUT ANY EXPRESS OR IMPLIED * WARRANTY, INCLUDING OF MERCHANTABILITY, SATISFACTORY QUALITY * AND FITNESS FOR A PARTICULAR PURPOSE. Please see the CERN-OHL-W v2 * for applicable conditions. * * Source location: https://git.web.rug.nl/bics/actlib_dataflow_neuro * * As per CERN-OHL-W v2 section 4.1, should You produce hardware based on * these sources, You must maintain the Source Location visible in its * documentation. * ************************************************************************** */ import "../../dataflow_neuro/cell_lib_async.act"; import "../../dataflow_neuro/cell_lib_std.act"; import "../../dataflow_neuro/treegates.act"; import "../../dataflow_neuro/primitives.act"; // import tmpl::dataflow_neuro; // import tmpl::dataflow_neuro; import std::channel; open std::channel; import std::data; open std::data; // import dev::channel; // open dev::channel; namespace tmpl { namespace dataflow_neuro { /** * 2D decoder which uses a configurable delay from the VCtrees to buffer ack. * Nx is the x size of the decoder array * NxC is the number of wires in the x channel. * Thus NxC should be something like NxC = ceil(log2(Nx)) * but my guess is that we can't do logs... * N_dly_cfg is the number of config bits in the ACK delay line, * with all bits high corresponding to 2**N_dly_cfg -1 DLY4_X1 cells. */ export template defproc decoder_2d_dly (avMx1of2 in; bool? outx[Nx], outy[Ny], dly_cfg[N_dly_cfg], reset_B; power supply) { // Buffer to recieve concat(x,y) address packet buffer addr_buf(.in = in, .reset_B = reset_B, .supply = supply); // NEED TO BUFFER OUTPUTS FROM BUFFER I RECKON // Validity trees vtree vtree_x (.supply = supply); vtree vtree_y (.supply = supply); (i:0..NxC-1:vtree_x.in.d[i].t = addr_buf.out.d.d[i].t;) (i:0..NxC-1:vtree_x.in.d[i].f = addr_buf.out.d.d[i].f;) (i:0..NyC-1:vtree_y.in.d[i].t = addr_buf.out.d.d[i+NxC].t;) (i:0..NyC-1:vtree_y.in.d[i].f = addr_buf.out.d.d[i+NxC].f;) // Delay ack line. Ack line is delayed (but not the val) A_2C_B_X1 C2el(.c1 = vtree_x.out, .c2 = vtree_y.out, .vdd = supply.vdd, .vss = supply.vss); addr_buf.out.v = C2el.y; // delayprog dly(.in = tielow.y, .s = dly_cfg, .supply = supply); delayprog dly(.in = C2el.y, .s = dly_cfg, .supply = supply); // ACK MAY HAVE BEEN DISCONNECTED HERE // FOR TESTING PURPOSES // !!!!!!!!!!!!!!!! dly.out = addr_buf.out.a; // ACK MAY HAVE BEEN DISCONNECTED HERE // FOR TESTING PURPOSES // !!!!!!!!!!!!!!!! // AND trees pint bitval; andtree atree_x[Nx]; (k:0..Nx-1:atree_x[k].supply = supply;) (i:0..Nx-1: (j:0..NxC-1: bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j [bitval = 1 -> atree_x[i].in[j] = addr_buf.out.d.d[j].t; []bitval = 0 -> atree_x[i].in[j] = addr_buf.out.d.d[j].f; []bitval >= 2 -> {false : "fuck"}; ] atree_x[i].out = outx[i]; ) ) andtree atree_y[Ny]; (k:0..Ny-1:atree_y[k].supply = supply;) (i:0..Ny-1: (j:0..NyC-1: bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j [bitval = 1 -> atree_y[i].in[j] = addr_buf.out.d.d[j+NxC].t; []bitval = 0 -> atree_y[i].in[j] = addr_buf.out.d.d[j+NxC].f; ] atree_y[i].out = outy[i]; ) ) } /* * Build an arbiter_handshake tree. */ export template defproc arbtree (a1of1 in[N]; a1of1 out; power supply) { bool tout; { N > 0 : "What?" }; pint i, end, j; i = 0; end = N-1; pint arbCount; arbCount = 0; /* Pre"calculate" the number of C cells required, look below if confused */ *[ i != end -> j = 0; *[ i <= end -> j = j + 1; [i = end -> i = end+1; [] i+1 = end -> i = end+1; arbCount = arbCount +1; [] else -> i = i + 2; arbCount = arbCount +1; ] ] /*-- update range that has to be combined --*/ // i = end+1; end = end+j; ] /* array that holds ALL the nodes in the completion tree */ a1of1 tmp[end+1]; // Connecting the first nodes to the input (l:N: tmp[l] = in[l]; ) /* array to hold the actual C-elments, either A2C or A3C */ [arbCount > 0 -> arbiter_handshake arbs[arbCount]; ] (h:arbCount:arbs[h].supply = supply;) /* Reset the variables we just stole lol */ i = 0; end = N-1; j = 0; pint arbIndex = 0; /* Invariant: i <= end */ *[ i != end -> /* * Invariant: tmp[i..end] has the current signals that need to be * combined together, and "isinv" specifies if they are the inverted * sense or not */ j = 0; *[ i <= end -> /*-- there are still signals that need to be combined --*/ j = j + 1; [ i = end -> /*-- last piece: pipe input through to next layer --*/ tmp[end+j] = tmp[i]; i = end+1; [] i+1 = end -> /*-- last piece: use either a 2 input C-element --*/ arbs[arbIndex].in1 = tmp[i]; arbs[arbIndex].in2 = tmp[i+1]; arbs[arbIndex].out = tmp[end+j]; arbIndex = arbIndex +1; i = end+1; [] else -> /*-- more to come; so use a two input C-element --*/ arbs[arbIndex].in1 = tmp[i]; arbs[arbIndex].in2 = tmp[i+1]; arbs[arbIndex].out = tmp[end+j]; arbIndex = arbIndex +1; i = i + 2; ] ] /*-- update range that has to be combined --*/ i = end+1; end = end+j; j = 0; ] out = tmp[end]; } export template defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) { AND2_X1 ands[Nx*Ny]; (i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;) (x:0..Nx-1: (y:0..Ny-1: ands[x + y*Nx].a = inx[x]; ands[x + y*Nx].b = iny[y]; ands[x + y*Nx].y = out[x + y*Nx]; ) ) } // Generates the OR-trees required to go from // N one-hot inputs to Nc dual rail binary encoding. export template defproc dualrail_encoder(bool? in[N]; Mx1of2 out; power supply) { {N <= 1< ors_t[Nc]; ortree<_N/2> ors_f[Nc]; (i:Nc:ors_t[i].supply = supply; ors_t[i].out = out.d[i].t;) (i:Nc:ors_f[i].supply = supply; ors_f[i].out = out.d[i].f;) pint num_connected_t; // Number of guys already connected to the current OR tree pint num_connected_f; TIELO_X1 tielo(.vdd = supply.vdd, .vss = supply.vss); // I'm sorry pint bitval; (i:0..Nc-1: // For each output line num_connected_t = 0; num_connected_f = 0; (j:0.. _N-1: bitval = (j & ( 1 << i )) >> i; // Get binary digit of integer j, column i [bitval = 1 & j <= N-1-> ors_t[i].in[num_connected_t] = in[j]; num_connected_t = num_connected_t + 1; [] bitval = 0 & j <= N-1-> ors_f[i].in[num_connected_f] = in[j]; num_connected_f = num_connected_f + 1; [] bitval = 1 & j > N-1-> ors_t[i].in[num_connected_t] = tielo.y; num_connected_t = num_connected_t + 1; [] bitval = 0 & j > N-1-> ors_f[i].in[num_connected_f] = tielo.y; num_connected_f = num_connected_f + 1; ] ) ) } template defproc encoder2D(a1of1 x[N]; a1of1 y[M] ;avMx1of2 addr; power supply; bool reset_B) { // Reset buffers bool _reset_BX,_reset_BXX[H]; BUF_X1 reset_buf(.a=reset_B, .y=_reset_BX,.vdd=supply.vdd,.vss=supply.vss); sigbuf<2*address_size+3> reset_bufarray(.in=_reset_BX, .out=_reset_BXX,.vdd=supply.vdd,.vss=supply.vss); // Arbiters a1of1 _out_arb_x,_out_arb_y; a1of1 _x_temp[N]; (i:N: _x_temp[i].r = x[i].r; ) (i:M: _y_temp[i].r = y[i].r; ) arbtree Xarb(.in = _x_temp,.out = _out_arb_X,.supply = supply); arbtree Yarb(.in = _y_temp,.out = _out_arb_Y,.supply = supply); // Sigbufs for strong ackowledge signals sigbuf_1output x_ack_arb[N]; sigbuf_1output y_ack_arb[M]; (i:N: x_ack_arb[i].in = _x_temp[i].a; x_ack_arb[i].out[0] = x[i].a; x_ack_arb[i].supply = supply; ) (i:M: y_ack_arb[i].in = _y_temp[i].a; y_ack_arb[i].out[0] = y[i].a; y_ack_arb[i].supply = supply; ) // This block checks that the input is valid and that the arbiter made a choice // Then activates the ack of the arbiter bool _x_v,_in_x_v; A_2C2P_RB_X1 Y_ack_confirm(); Y_ack_confirm.p1 = _x_v; Y_ack_confirm.p2 =_in_x_v; Y_ack_confirm.c1 = _out_arb_Y.r; Y_ack_confirm.c2 = _x_a_B; Y_ack_confirm.y = _out_arb_Y.a; Y_ack_confirm.vdd = supply.vdd; Y_ack_confirm.vss = supply.vss; Y_ack_confirm.reset_B = _reset_BXX[0]; // This block checks that the input is valid and that the arbiter made a choice // Then activates the ack of the arbiter A_2C_RB X_ack_confirm(); X_ack_confirm.c1 = _out_arb_X.r; X_ack_confirm.c2 = _x_a_B; X_ack_confirm.vdd = supply.vdd; X_ack_confirm.vss = supply.vss; X_ack_confirm.reset_B = _reset_BXX[1]; //X_REQ validation bool _x_req_array[N],_x_v,_x_v_B; (i:N:_x_req_array[i] = x[i].r;) ortree x_req_ortree(.in = _x_req_array,.out = _x_v,.supply = supply); INV_X1 not_x_req_ortree(.in = _x_v,.out = _x_v_B); // A_1C3P2P2N_R_X1 x_ack(); // NEEDS BUFFERING TO X4 //branch1 x_ack.p1 = _in_x_v; x_ack.p2 = _x_v_B; //branch2 x_ack.p3 = _in_x_v; x_ack.p4 = _in_y_v; x_ack.p5 = _x_v; // x_ack.c1 = _en x_ack.n1 = addr.v x_ack.n2 = _in_x_v; // x_ack.y = _x_a_B; // x_ack.vdd = supply.vdd; x_ack.vss = supply.vss; x_ack.reset_B = _reset_BXX[2]; INV_X1 not_x_ack(.out = _x_a,.in = _x_a_B); A_1C2P enabling(.p1 = addr.a, .p2 = addr.v, .c1 = _x_a, .y = _en, .vdd = supply.vdd, .vss = supply.vss) avMx1of2 _in_x; dualrail _in; _in_x.d = _in.d; _in_x.v = _in_x_v; //buffer_func_s A_2C2N_RB buffer_func_s_f[address_size]; A_2C2N_RB buffer_func_s_t[address_size]; sigbuf en_buf_t(.in=_en, .out=_en_X_t, .supply=supply); sigbuf en_buf_f(.in=_en, .out=_en_X_f, .supply=supply); INV_X1 out_a_inv(.a=addr.a,.y=_out_a_B); sigbuf out_a_B_buf_f(.in=_out_a_B,.out=_out_a_BX_t, .supply=supply); sigbuf out_a_B_buf_t(.in=_out_a_B,.out=_out_a_BX_f, .supply=supply); (i:address_size: buffer_func_s_f[i].c1 = _en_X_f[i]; buffer_func_s_f[i].c2 = _out_a_BX_f[i]; buffer_func_s_f[i].n1 = _in_x.d.d[i].f; buffer_func_s_f[i].n1 = _in_x.v; buffer_func_s_f[i].vdd=supply.vdd; buffer_func_s_f[i].vss=supply.vss; buffer_func_s_f[i].pr_B = _reset_BXX[i+3]; buffer_func_s_f[i].sr_B = _reset_BXX[i+3]; buffer_func_s_f[i].y = addr.d.d[i].f; buffer_func_s_t[i].c1 = _en_X_r[i]; buffer_func_s_t[i].c2 = _out_a_BX_t[i]; buffer_func_s_t[i].n1 = _in_x.d.d[i].r; buffer_func_s_t[i].n1 = _in_x.v; buffer_func_s_t[i].vdd=supply.vdd; buffer_func_s_t[i].vss=supply.vss; buffer_func_s_t[i].pr_B = _reset_BXX[i+3+address_size]; buffer_func_s_t[i].sr_B = _reset_BXX[i+3+address_size]; buffer_func_s_t[i].y = addr.d.d[i].t; ) bool _addr_v vtree addr_validity(.in = addr,.out = _addr_v); sigbuf_1output<4> addr_validity_x(.in = _addr_v,.out = addr.v); addr_validity.supply = supply; addr_validity_x.supply = supply; } } }