2022-03-02 09:48:41 +01:00
|
|
|
/*************************************************************************
|
|
|
|
*
|
|
|
|
* This file is part of ACT dataflow neuro library
|
|
|
|
*
|
|
|
|
* Copyright (c) 2022 University of Groningen - Ole Richter
|
|
|
|
* Copyright (c) 2022 University of Groningen - Michele Mastella
|
|
|
|
* Copyright (c) 2022 University of Groningen - Hugh Greatorex
|
|
|
|
* Copyright (c) 2022 University of Groningen - Madison Cotteret
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* This source describes Open Hardware and is licensed under the CERN-OHL-W v2 or later
|
|
|
|
*
|
|
|
|
* You may redistribute and modify this documentation and make products
|
|
|
|
* using it under the terms of the CERN-OHL-W v2 (https:/cern.ch/cern-ohl).
|
|
|
|
* This documentation is distributed WITHOUT ANY EXPRESS OR IMPLIED
|
|
|
|
* WARRANTY, INCLUDING OF MERCHANTABILITY, SATISFACTORY QUALITY
|
|
|
|
* AND FITNESS FOR A PARTICULAR PURPOSE. Please see the CERN-OHL-W v2
|
|
|
|
* for applicable conditions.
|
|
|
|
*
|
|
|
|
* Source location: https://git.web.rug.nl/bics/actlib_dataflow_neuro
|
|
|
|
*
|
|
|
|
* As per CERN-OHL-W v2 section 4.1, should You produce hardware based on
|
|
|
|
* these sources, You must maintain the Source Location visible in its
|
|
|
|
* documentation.
|
|
|
|
*
|
|
|
|
**************************************************************************
|
|
|
|
*/
|
|
|
|
import "../../dataflow_neuro/cell_lib_async.act";
|
|
|
|
import "../../dataflow_neuro/cell_lib_std.act";
|
|
|
|
import "../../dataflow_neuro/treegates.act";
|
|
|
|
import "../../dataflow_neuro/primitives.act";
|
|
|
|
// import tmpl::dataflow_neuro;
|
|
|
|
// import tmpl::dataflow_neuro;
|
|
|
|
import std::channel;
|
|
|
|
open std::channel;
|
|
|
|
|
2022-03-04 14:10:15 +01:00
|
|
|
import std::data;
|
|
|
|
open std::data;
|
|
|
|
|
|
|
|
|
|
|
|
// import dev::channel;
|
|
|
|
// open dev::channel;
|
|
|
|
|
|
|
|
|
2022-03-02 09:48:41 +01:00
|
|
|
namespace tmpl {
|
|
|
|
namespace dataflow_neuro {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* 2D decoder which uses a configurable delay from the VCtrees to buffer ack.
|
|
|
|
* Nx is the x size of the decoder array
|
|
|
|
* NxC is the number of wires in the x channel.
|
|
|
|
* Thus NxC should be something like NxC = ceil(log2(Nx))
|
|
|
|
* but my guess is that we can't do logs...
|
|
|
|
* N_dly_cfg is the number of config bits in the ACK delay line,
|
2022-03-03 17:09:00 +01:00
|
|
|
* with all bits high corresponding to 2**N_dly_cfg -1 DLY4_X1 cells.
|
2022-03-02 09:48:41 +01:00
|
|
|
*/
|
2022-03-02 15:55:26 +01:00
|
|
|
export template<pint NxC, NyC, Nx, Ny, N_dly_cfg>
|
|
|
|
defproc decoder_2d_dly (avMx1of2<NxC+NyC> in; bool? outx[Nx], outy[Ny],
|
|
|
|
dly_cfg[N_dly_cfg], reset_B; power supply) {
|
2022-03-02 09:48:41 +01:00
|
|
|
|
2022-03-02 15:55:26 +01:00
|
|
|
// Buffer to recieve concat(x,y) address packet
|
|
|
|
buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
|
|
|
|
// NEED TO BUFFER OUTPUTS FROM BUFFER I RECKON
|
2022-03-02 09:48:41 +01:00
|
|
|
|
2022-03-02 15:55:26 +01:00
|
|
|
// Validity trees
|
|
|
|
vtree<NxC> vtree_x (.supply = supply);
|
|
|
|
vtree<NyC> vtree_y (.supply = supply);
|
|
|
|
(i:0..NxC-1:vtree_x.in.d[i].t = addr_buf.out.d.d[i].t;)
|
|
|
|
(i:0..NxC-1:vtree_x.in.d[i].f = addr_buf.out.d.d[i].f;)
|
|
|
|
(i:0..NyC-1:vtree_y.in.d[i].t = addr_buf.out.d.d[i+NxC].t;)
|
|
|
|
(i:0..NyC-1:vtree_y.in.d[i].f = addr_buf.out.d.d[i+NxC].f;)
|
2022-03-02 09:48:41 +01:00
|
|
|
|
|
|
|
|
2022-03-02 15:55:26 +01:00
|
|
|
// Delay ack line. Ack line is delayed (but not the val)
|
|
|
|
A_2C_B_X1 C2el(.c1 = vtree_x.out, .c2 = vtree_y.out, .vdd = supply.vdd, .vss = supply.vss);
|
|
|
|
addr_buf.out.v = C2el.y;
|
2022-03-02 09:48:41 +01:00
|
|
|
|
2022-03-02 15:55:26 +01:00
|
|
|
// delayprog<N_dly_cfg> dly(.in = tielow.y, .s = dly_cfg, .supply = supply);
|
|
|
|
delayprog<N_dly_cfg> dly(.in = C2el.y, .s = dly_cfg, .supply = supply);
|
2022-03-02 09:48:41 +01:00
|
|
|
|
2022-03-02 15:55:26 +01:00
|
|
|
// ACK MAY HAVE BEEN DISCONNECTED HERE
|
|
|
|
// FOR TESTING PURPOSES
|
|
|
|
// !!!!!!!!!!!!!!!!
|
|
|
|
dly.out = addr_buf.out.a;
|
|
|
|
// ACK MAY HAVE BEEN DISCONNECTED HERE
|
|
|
|
// FOR TESTING PURPOSES
|
|
|
|
// !!!!!!!!!!!!!!!!
|
|
|
|
|
|
|
|
// AND trees
|
|
|
|
pint bitval;
|
|
|
|
andtree<NxC> atree_x[Nx];
|
|
|
|
(k:0..Nx-1:atree_x[k].supply = supply;)
|
|
|
|
(i:0..Nx-1:
|
|
|
|
(j:0..NxC-1:
|
|
|
|
bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
|
|
|
|
[bitval = 1 ->
|
|
|
|
atree_x[i].in[j] = addr_buf.out.d.d[j].t;
|
2022-03-03 11:56:34 +01:00
|
|
|
[]bitval = 0 ->
|
2022-03-02 15:55:26 +01:00
|
|
|
atree_x[i].in[j] = addr_buf.out.d.d[j].f;
|
2022-03-03 11:56:34 +01:00
|
|
|
[]bitval >= 2 -> {false : "fuck"};
|
|
|
|
]
|
2022-03-02 15:55:26 +01:00
|
|
|
atree_x[i].out = outx[i];
|
|
|
|
)
|
2022-03-03 11:56:34 +01:00
|
|
|
)
|
2022-03-02 15:55:26 +01:00
|
|
|
|
|
|
|
andtree<NyC> atree_y[Ny];
|
|
|
|
(k:0..Ny-1:atree_y[k].supply = supply;)
|
|
|
|
(i:0..Ny-1:
|
|
|
|
(j:0..NyC-1:
|
|
|
|
bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
|
|
|
|
[bitval = 1 ->
|
|
|
|
atree_y[i].in[j] = addr_buf.out.d.d[j+NxC].t;
|
2022-03-03 11:56:34 +01:00
|
|
|
[]bitval = 0 ->
|
2022-03-02 15:55:26 +01:00
|
|
|
atree_y[i].in[j] = addr_buf.out.d.d[j+NxC].f;
|
2022-03-03 11:56:34 +01:00
|
|
|
]
|
2022-03-02 15:55:26 +01:00
|
|
|
atree_y[i].out = outy[i];
|
|
|
|
)
|
2022-03-03 11:56:34 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Build an arbiter_handshake tree.
|
|
|
|
*/
|
|
|
|
export template<pint N>
|
|
|
|
defproc arbtree (a1of1 in[N]; a1of1 out; power supply)
|
|
|
|
{
|
|
|
|
bool tout;
|
|
|
|
|
|
|
|
{ N > 0 : "What?" };
|
|
|
|
|
|
|
|
pint i, end, j;
|
|
|
|
i = 0;
|
|
|
|
end = N-1;
|
|
|
|
|
|
|
|
pint arbCount;
|
|
|
|
arbCount = 0;
|
|
|
|
/* Pre"calculate" the number of C cells required, look below if confused */
|
|
|
|
*[ i != end ->
|
|
|
|
j = 0;
|
|
|
|
*[ i <= end ->
|
|
|
|
j = j + 1;
|
|
|
|
[i = end ->
|
|
|
|
i = end+1;
|
|
|
|
[] i+1 = end ->
|
|
|
|
i = end+1;
|
|
|
|
arbCount = arbCount +1;
|
|
|
|
[] else ->
|
|
|
|
i = i + 2;
|
|
|
|
arbCount = arbCount +1;
|
|
|
|
]
|
|
|
|
]
|
|
|
|
/*-- update range that has to be combined --*/
|
|
|
|
// i = end+1;
|
|
|
|
end = end+j;
|
|
|
|
]
|
|
|
|
|
|
|
|
/* array that holds ALL the nodes in the completion tree */
|
|
|
|
a1of1 tmp[end+1];
|
|
|
|
|
|
|
|
// Connecting the first nodes to the input
|
|
|
|
(l:N:
|
|
|
|
tmp[l] = in[l];
|
|
|
|
)
|
|
|
|
|
|
|
|
/* array to hold the actual C-elments, either A2C or A3C */
|
|
|
|
[arbCount > 0 ->
|
|
|
|
arbiter_handshake arbs[arbCount];
|
|
|
|
]
|
|
|
|
(h:arbCount:arbs[h].supply = supply;)
|
|
|
|
|
|
|
|
/* Reset the variables we just stole lol */
|
|
|
|
i = 0;
|
|
|
|
end = N-1;
|
|
|
|
j = 0;
|
|
|
|
pint arbIndex = 0;
|
|
|
|
|
|
|
|
/* Invariant: i <= end */
|
|
|
|
|
|
|
|
*[ i != end ->
|
|
|
|
/*
|
|
|
|
* Invariant: tmp[i..end] has the current signals that need to be
|
|
|
|
* combined together, and "isinv" specifies if they are the inverted
|
|
|
|
* sense or not
|
|
|
|
*/
|
|
|
|
j = 0;
|
|
|
|
*[ i <= end ->
|
|
|
|
/*-- there are still signals that need to be combined --*/
|
|
|
|
j = j + 1;
|
|
|
|
[ i = end ->
|
|
|
|
/*-- last piece: pipe input through to next layer --*/
|
|
|
|
tmp[end+j] = tmp[i];
|
|
|
|
i = end+1;
|
|
|
|
[] i+1 = end ->
|
|
|
|
/*-- last piece: use either a 2 input C-element --*/
|
|
|
|
arbs[arbIndex].in1 = tmp[i];
|
|
|
|
arbs[arbIndex].in2 = tmp[i+1];
|
|
|
|
arbs[arbIndex].out = tmp[end+j];
|
|
|
|
arbIndex = arbIndex +1;
|
|
|
|
i = end+1;
|
|
|
|
[] else ->
|
|
|
|
/*-- more to come; so use a two input C-element --*/
|
|
|
|
arbs[arbIndex].in1 = tmp[i];
|
|
|
|
arbs[arbIndex].in2 = tmp[i+1];
|
|
|
|
arbs[arbIndex].out = tmp[end+j];
|
|
|
|
arbIndex = arbIndex +1;
|
|
|
|
i = i + 2;
|
|
|
|
]
|
|
|
|
]
|
|
|
|
/*-- update range that has to be combined --*/
|
|
|
|
i = end+1;
|
|
|
|
end = end+j;
|
|
|
|
j = 0;
|
|
|
|
]
|
|
|
|
|
|
|
|
out = tmp[end];
|
2022-03-02 09:48:41 +01:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-03-03 17:09:00 +01:00
|
|
|
export template<pint Nx, Ny>
|
|
|
|
defproc and_grid(bool! out[Nx*Ny]; bool? inx[Nx], iny[Ny]; power supply) {
|
|
|
|
AND2_X1 ands[Nx*Ny];
|
|
|
|
(i:0..Nx*Ny-1:ands[i].vss = supply.vss; ands[i].vdd = supply.vdd;)
|
|
|
|
(x:0..Nx-1:
|
|
|
|
(y:0..Ny-1:
|
|
|
|
ands[x + y*Nx].a = inx[x];
|
|
|
|
ands[x + y*Nx].b = iny[y];
|
|
|
|
ands[x + y*Nx].y = out[x + y*Nx];
|
|
|
|
)
|
|
|
|
)
|
2022-03-03 17:10:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-03-04 14:10:15 +01:00
|
|
|
|
|
|
|
// Generates the OR-trees required to go from
|
|
|
|
// N one-hot inputs to Nc dual rail binary encoding.
|
|
|
|
export template<pint Nc, N>
|
|
|
|
defproc encoder(bool? in[N]; Mx1of2<Nc> out; power supply) {
|
|
|
|
{N <= 1<<Nc : "Num inputs too wide for encoding channel!"};
|
|
|
|
|
|
|
|
// For each output line, need to precalculate how big of an OR tree it needs
|
|
|
|
// since can't presume that N = 2**Nc
|
|
|
|
// First version however, just be hella lazy and presume N=2**Nc,
|
|
|
|
// connect extra nodes to ground (sorry)
|
|
|
|
pint _N; // N rounded up to a power of 2
|
|
|
|
_N = (1<<Nc);
|
|
|
|
ortree<_N/2> ors_t[Nc];
|
|
|
|
ortree<_N/2> ors_f[Nc];
|
|
|
|
(i:Nc:ors_t[i].supply = supply; ors_t[i].out = out.d[i].t;)
|
|
|
|
(i:Nc:ors_f[i].supply = supply; ors_f[i].out = out.d[i].f;)
|
|
|
|
|
|
|
|
pint num_connected_t; // Number of guys already connected to the current OR tree
|
|
|
|
pint num_connected_f;
|
|
|
|
|
|
|
|
TIELO_X1 tielo(.vdd = supply.vdd, .vss = supply.vss); // I'm sorry
|
|
|
|
pint bitval;
|
|
|
|
(i:0..Nc-1: // For each output line
|
|
|
|
num_connected_t = 0;
|
|
|
|
num_connected_f = 0;
|
|
|
|
(j:0.. _N-1:
|
|
|
|
bitval = (j & ( 1 << i )) >> i; // Get binary digit of integer j, column i
|
|
|
|
[bitval = 1 & j <= N-1->
|
|
|
|
ors_t[i].in[num_connected_t] = in[j];
|
|
|
|
num_connected_t = num_connected_t + 1;
|
|
|
|
[] bitval = 0 & j <= N-1->
|
|
|
|
ors_f[i].in[num_connected_f] = in[j];
|
|
|
|
num_connected_f = num_connected_f + 1;
|
|
|
|
[] bitval = 1 & j > N-1->
|
|
|
|
ors_t[i].in[num_connected_t] = tielo.y;
|
|
|
|
num_connected_t = num_connected_t + 1;
|
|
|
|
[] bitval = 0 & j > N-1->
|
|
|
|
ors_f[i].in[num_connected_f] = tielo.y;
|
|
|
|
num_connected_f = num_connected_f + 1;
|
|
|
|
]
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2022-03-03 15:10:31 +01:00
|
|
|
|
2022-03-03 11:56:34 +01:00
|
|
|
|
2022-03-03 17:09:00 +01:00
|
|
|
}
|
2022-03-03 11:56:34 +01:00
|
|
|
|
|
|
|
|
2022-03-04 14:10:15 +01:00
|
|
|
|
|
|
|
|
|
|
|
// template<pint N, pint M, ACK_STRENGTH>
|
|
|
|
// defproc encoder2D(a1of1 x[N]; a1of1 y[M] ;avMx1of2<X> addr; bool! out_a; power supply)
|
|
|
|
// {
|
|
|
|
// // Arbiters
|
|
|
|
// a1of1 _out_arb_x,_out_arb_y;
|
|
|
|
// a1of1 _x_temp[N];
|
|
|
|
// (i:N:
|
|
|
|
// _x_temp[i].r = x[i].r;
|
|
|
|
// )
|
|
|
|
// (i:M:
|
|
|
|
// _y_temp[i].r = y[i].r;
|
|
|
|
// )
|
|
|
|
// arbtree<N> Xarb(.in = _x_temp,.out = _out_arb_X,.supply = supply);
|
|
|
|
// arbtree<M> Yarb(.in = _y_temp,.out = _out_arb_Y,.supply = supply);
|
|
|
|
|
|
|
|
// sigbuf<ACK_STRENGTH> x_ack_arb[N];
|
|
|
|
// sigbuf<ACK_STRENGTH> y_ack_arb[M];
|
|
|
|
// (i:N:
|
|
|
|
// x_ack_arb[i].in = _x_temp[i].a;
|
|
|
|
// x_ack_arb[i].out[0] = x[i].a;
|
|
|
|
// x_ack_arb[i].supply = supply;
|
|
|
|
// )
|
|
|
|
// (i:M:
|
|
|
|
// y_ack_arb[i].in = _y_temp[i].a;
|
|
|
|
// y_ack_arb[i].out[0] = y[i].a;
|
|
|
|
// y_ack_arb[i].supply = supply;
|
|
|
|
// )
|
|
|
|
|
|
|
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
2022-03-02 09:48:41 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|