actlib_dataflow_neuro/dataflow_neuro/treegates.act

230 lines
5.7 KiB
Plaintext

/*************************************************************************
*
* This file is part of ACT dataflow neuro library
*
* Copyright (c) 2022 University of Groningen - Ole Richter
* Copyright (c) 2021 Rajit Manohar
*
* This source describes Open Hardware and is licensed under the CERN-OHL-W v2 or later
*
* You may redistribute and modify this documentation and make products
* using it under the terms of the CERN-OHL-W v2 (https:/cern.ch/cern-ohl).
* This documentation is distributed WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY, INCLUDING OF MERCHANTABILITY, SATISFACTORY QUALITY
* AND FITNESS FOR A PARTICULAR PURPOSE. Please see the CERN-OHL-W v2
* for applicable conditions.
*
* Source location: https://git.web.rug.nl/bics/actlib_dataflow_neuro
*
* As per CERN-OHL-W v2 section 4.1, should You produce hardware based on
* these sources, You must maintain the Source Location visible in its
* documentation.
*
**************************************************************************
namespace std {
export namespace gates {
/*
* Build an OR-gate tree (NOR/NAND/optional INV)
*/
export template<pint N; pbool invert>
defproc ortree (bool? in[N]; bool out)
{
bool tout;
{ N > 0 : "What?" };
pint i, end, j;
pbool isinv;
isinv = false;
i = 0;
end = N-1;
/* dynamic array that holds all the nodes in the completion tree */
bool tmp[N];
(k:N:tmp[k] = in[k];)
/* Invariant: i <= end */
*[ i != end ->
/*
* Invariant: tmp[i..end] has the current signals that need to be
* combined together, and "isinv" specifies if they are the inverted
* sense or not
*/
j = 0;
*[ i < end ->
/*-- there are still signals that need to be combined --*/
j = j + 1;
bool tmp[end+j..end+j];
[ i+2 >= end ->
/*-- last piece: use either a 2 or 3 input NAND/NOR gate --*/
[isinv ->
prs { (&k:i..end:tmp[k]) => tmp[end+j]- }
[] else ->
prs { (|k:i..end:tmp[k]) => tmp[end+j]- }
]
i = end;
[] else ->
/*-- more to come; so use a two input C-element --*/
[isinv ->
prs { (&k:i..i+1:tmp[k]) => tmp[end+j]- }
[] else ->
prs { (|k:i..i+1:tmp[k]) => tmp[end+j]- }
]
i = i + 2;
]
sizing {
leak_adjust <- 1;
p_n_mode <- 1;
tmp[end+j]{-1}
}
]
/*-- we just added an inverting layer --*/
isinv = ~isinv;
/*-- update range that has to be combined --*/
i = end+1;
end = end+j;
j = 0;
]
isinv = invert ? ~isinv : isinv;
/*-- invert the signal if needed --*/
[isinv -> prs { tmp[end] => out- }
[] else -> tmp[end] = out;
]
[isinv ->
sizing {
leak_adjust <- 1;
p_n_mode <- 1;
out{-1}
}
]
}
/*
* Build a completion tree using a combination of 2-input and 3-input
* C-elements
*/
export template<pint N; pbool invert>
defproc ctree (bool? in[N]; bool out)
{
bool tout;
{ N > 0 : "What?" };
pint i, end, j;
pbool isinv;
isinv = invert;
i = 0;
end = N-1;
pint lenTree2Count, lenTree3Count;
/* Pre"calculate" the number of C cells required, look below if confused */
*[ i != end ->
j = 0;
*[ i < end ->
j = j + 1;
[ i+1 >= end ->
i = end;
lenTree2Count = lenTree2Count +1;
[] i+2 >= end ->
i = end;
lenTree3Count = lenTree3Count +1;
[] else ->
i = i + 2;
lenTree2Count = lenTree2Count +1;
]
]
/*-- update range that has to be combined --*/
i = end+1;
end = end+j;
j = 0;
]
/* array that holds ALL the nodes in the completion tree */
bool tmp[end];
(k:N:tmp[k] = in[k];)
/* array to hold the actual C-elments, either A2C or A3C */
A_2C_B_X1 C2Els[lenTree2Count];
A_3C_B_X1 C3Els[lenTree3Count];
/* Reset the variables we just stole lol */
i = 0;
end = N-1;
j = 0;
pint tree2Index = 0;
pint tree3Index = 0;
/* Invariant: i <= end */
*[ i != end ->
/*
* Invariant: tmp[i..end] has the current signals that need to be
* combined together, and "isinv" specifies if they are the inverted
* sense or not
*/
j = 0;
*[ i < end ->
/*-- there are still signals that need to be combined --*/
j = j + 1;
bool tmp[end+j..end+j];
[ i+1 >= end ->
/*-- last piece: use either a 2 input C-element --*/
C2Els[tree2Index](.c1 = tmp[i], .c2 = tmp[i+1], .y = tmp[end+j])
tree2Index = tree2Index +1;
i = end;
[] i+2 >= end ->
/*-- last piece: use either a 3 input C-element --*/
C3Els[tree3Index](.c1 = tmp[i], .c2 = tmp[i+1], .c3 = tmp[i+2], .y = tmp[end+j])
tree3Index = tree3Index +1;
i = end;
[] else ->
/*-- more to come; so use a two input C-element --*/
C2Els[tree2Index](.c1 = tmp[i], .c2 = tmp[i+1], .y = tmp[end+j])
tree2Index = tree2Index +1;
i = i + 2;
]
]
/*-- update range that has to be combined --*/
i = end+1;
end = end+j;
j = 0;
]
}
export template<pint N>
defproc sigbuf (bool? in; bool! out; power supply)
{
{ N >= 0 : "sigbuf: parameter error" };
{ N <= 43 : "sigbuf: parameter error, N too big" };
/* -- just use a sized driver here -- */
[ N <= 4 ->
BUF_X1 buf;
[] N >= 5 & N <= 7 ->
BUF_X2 buf;
[] N >= 8 & N <= 10 ->
BUF_X3 buf;
[] N >= 11 & N <= 14 ->
BUF_X4 buf;
[] N >= 15 & N <= 18 ->
BUF_X6 buf;
[] N >= 19 & N <= 29 ->
BUF_X8 buf;
[] N >= 30 & N <= 42 ->
BUF_X12 buf;
]
buf.a = in;
buf.y = out;
buf.vdd = supply.vdd;
buf.vss = supply.vss;
}