actlib_dataflow_neuro/dataflow_neuro/treegates.act

/*************************************************************************
 *
 *  This file is part of ACT dataflow neuro library
 *
 *  Copyright (c) 2022 University of Groningen - Ole Richter
 *  Copyright (c) 2021 Rajit Manohar
 *
 *  This source describes Open Hardware and is licensed under the CERN-OHL-W v2 or later
 *
 *  You may redistribute and modify this documentation and make products
 *  using it under the terms of the CERN-OHL-W v2 (https:/cern.ch/cern-ohl).
 *  This documentation is distributed WITHOUT ANY EXPRESS OR IMPLIED
 *  WARRANTY, INCLUDING OF MERCHANTABILITY, SATISFACTORY QUALITY
 *  AND FITNESS FOR A PARTICULAR PURPOSE. Please see the CERN-OHL-W v2
 *  for applicable conditions.
 *
 *  Source location: https://git.web.rug.nl/bics/actlib_dataflow_neuro
 *
 *  As per CERN-OHL-W v2 section 4.1, should You produce hardware based on
 *  these sources, You must maintain the Source Location visible in its
 *  documentation.
 *
 **************************************************************************
namespace std {

export namespace gates {

/*
 * Build an OR-gate tree (NOR/NAND/optional INV)
 */
export template<pint N; pbool invert>
defproc ortree (bool? in[N]; bool out)
{
  bool tout;

  { N > 0 : "What?" };

  pint i, end, j;
  pbool isinv;
  isinv = false;
  i = 0;
  end = N-1;

  /* dynamic array that holds all the nodes in the completion tree */
  bool tmp[N];
  (k:N:tmp[k] = in[k];)

  /* Invariant: i <= end */

  *[ i != end ->
     /*
      * Invariant: tmp[i..end] has the current signals that need to be
      * combined together, and "isinv" specifies if they are the inverted
      * sense or not
      */
     j = 0;
     *[ i < end ->
        /*-- there are still signals that need to be combined --*/
        j = j + 1;
        bool tmp[end+j..end+j];
        [ i+2 >= end ->
          /*-- last piece: use either a 2 or 3 input NAND/NOR gate --*/
          [isinv ->
            prs { (&k:i..end:tmp[k]) => tmp[end+j]- }
          [] else ->
            prs { (|k:i..end:tmp[k]) => tmp[end+j]- }
          ]
          i = end;
        [] else ->
          /*-- more to come; so use a two input C-element --*/
          [isinv ->
            prs { (&k:i..i+1:tmp[k]) => tmp[end+j]- }
          [] else ->
            prs { (|k:i..i+1:tmp[k]) => tmp[end+j]- }
          ]
          i = i + 2;
        ]
        sizing {
          leak_adjust <- 1;
          p_n_mode <- 1;
          tmp[end+j]{-1}
        }
      ]
      /*-- we just added an inverting layer --*/
      isinv = ~isinv;

      /*-- update range that has to be combined --*/
      i = end+1;
      end = end+j;
      j = 0;
  ]

  isinv = invert ? ~isinv : isinv;

  /*-- invert the signal if needed --*/
  [isinv -> prs { tmp[end] => out- }
  [] else -> tmp[end] = out;
  ]
  [isinv ->
  sizing {
    leak_adjust <- 1;
    p_n_mode <- 1;
    out{-1}
  }
  ]
}

/*
 * Build a completion tree using a combination of 2-input and 3-input
 * C-elements
 */
export template<pint N; pbool invert>
defproc ctree (bool? in[N]; bool out)
{
  bool tout;

  { N > 0 : "What?" };

  pint i, end, j;
  pbool isinv;
  isinv = invert;
  i = 0;
  end = N-1;

  pint lenTree2Count, lenTree3Count;
  /* Pre"calculate" the number of C cells required, look below if confused */
  *[ i != end ->
     j = 0;
     *[ i < end ->
        j = j + 1;
        [ i+1 >= end ->
          i = end;
          lenTree2Count = lenTree2Count +1;
        [] i+2 >= end ->
          i = end;
          lenTree3Count = lenTree3Count +1;
        [] else ->
          i = i + 2;
          lenTree2Count = lenTree2Count +1;
        ]
      ]
      /*-- update range that has to be combined --*/
      i = end+1;
      end = end+j;
      j = 0;
  ]

  /* array that holds ALL the nodes in the completion tree */
  bool tmp[end];
  (k:N:tmp[k] = in[k];)

  /* array to hold the actual C-elments, either A2C or A3C */
  A_2C_B_X1 C2Els[lenTree2Count];
  A_3C_B_X1 C3Els[lenTree3Count];

  /* Reset the variables we just stole lol */
  i = 0;
  end = N-1;
  j = 0;
  pint tree2Index = 0;
  pint tree3Index = 0;

  /* Invariant: i <= end */

  *[ i != end ->
     /*
      * Invariant: tmp[i..end] has the current signals that need to be
      * combined together, and "isinv" specifies if they are the inverted
      * sense or not
      */
     j = 0;
     *[ i < end ->
        /*-- there are still signals that need to be combined --*/
        j = j + 1;
        bool tmp[end+j..end+j];
        [ i+1 >= end ->
          /*-- last piece: use either a 2 input C-element --*/
          C2Els[tree2Index](.c1 = tmp[i], .c2 = tmp[i+1], .y = tmp[end+j])
          tree2Index = tree2Index +1;
          i = end;
        [] i+2 >= end ->
          /*-- last piece: use either a 3 input C-element --*/
          C3Els[tree3Index](.c1 = tmp[i], .c2 = tmp[i+1], .c3 = tmp[i+2], .y = tmp[end+j])
          tree3Index = tree3Index +1;
          i = end;
        [] else ->
          /*-- more to come; so use a two input C-element --*/
          C2Els[tree2Index](.c1 = tmp[i], .c2 = tmp[i+1], .y = tmp[end+j])
          tree2Index = tree2Index +1;
          i = i + 2;
        ]
      ]
      /*-- update range that has to be combined --*/
      i = end+1;
      end = end+j;
      j = 0;
  ]
}

export template<pint N>
defproc sigbuf (bool? in; bool! out; power supply)
{

  { N >= 0 : "sigbuf: parameter error" };
  { N <= 43 : "sigbuf: parameter error, N too big" };

	  /* -- just use a sized driver here -- */
	[ N <= 4 ->
	  BUF_X1 buf;

	 [] N >= 5 & N <= 7 ->
    BUF_X2 buf;
	 [] N >= 8 & N <= 10 ->
    BUF_X3 buf;
   [] N >= 11 & N <= 14 ->
    BUF_X4 buf;
   [] N >= 15 & N <= 18 ->
    BUF_X6 buf;
   [] N >= 19 & N <= 29 ->
    BUF_X8 buf;
   [] N >= 30 & N <= 42 ->
    BUF_X12 buf;
   ]
  buf.a = in;
  buf.y = out;
  buf.vdd = supply.vdd;
  buf.vss = supply.vss;
}