actlib_dataflow_neuro/dataflow_neuro/coders.act

/*************************************************************************
 *
 *  This file is part of ACT dataflow neuro library
 *
 *  Copyright (c) 2022 University of Groningen - Ole Richter
 *  Copyright (c) 2022 University of Groningen - Michele Mastella
 *  Copyright (c) 2022 University of Groningen - Hugh Greatorex
 *  Copyright (c) 2022 University of Groningen - Madison Cotteret
 *
 *
 *  This source describes Open Hardware and is licensed under the CERN-OHL-W v2 or later
 *
 *  You may redistribute and modify this documentation and make products
 *  using it under the terms of the CERN-OHL-W v2 (https:/cern.ch/cern-ohl).
 *  This documentation is distributed WITHOUT ANY EXPRESS OR IMPLIED
 *  WARRANTY, INCLUDING OF MERCHANTABILITY, SATISFACTORY QUALITY
 *  AND FITNESS FOR A PARTICULAR PURPOSE. Please see the CERN-OHL-W v2
 *  for applicable conditions.
 *
 *  Source location: https://git.web.rug.nl/bics/actlib_dataflow_neuro
 *
 *  As per CERN-OHL-W v2 section 4.1, should You produce hardware based on
 *  these sources, You must maintain the Source Location visible in its
 *  documentation.
 *
 **************************************************************************
 */
import "../../dataflow_neuro/cell_lib_async.act";
import "../../dataflow_neuro/cell_lib_std.act";
import "../../dataflow_neuro/treegates.act";
import "../../dataflow_neuro/primitives.act";
// import tmpl::dataflow_neuro;
// import tmpl::dataflow_neuro;
import std::channel;
open std::channel;

namespace tmpl {
	namespace dataflow_neuro {

		/**
		 * 2D decoder which uses a configurable delay from the VCtrees to buffer ack.
		 *	Nx is the x size of the decoder array
		 * 	NxC is the number of wires in the x channel.
		 * 	Thus NxC should be something like NxC = ceil(log2(Nx))
		 * 	but my guess is that we can't do logs...
		 * 	N_dly_cfg is the number of config bits in the ACK delay line,
		 * 	with all bits high corresponding to 2**N_dly_cfg -1 DLY1_X4 cells.
		 */
		export template<pint NxC, NyC, Nx, Ny, N_dly_cfg>
		defproc decoder_2d_dly (avMx1of2<NxC+NyC> in; bool? outx[Nx], outy[Ny],
			dly_cfg[N_dly_cfg], reset_B; power supply) {

			// Buffer to recieve concat(x,y) address packet
			buffer<NxC+NyC> addr_buf(.in = in, .reset_B = reset_B, .supply = supply);
			// NEED TO BUFFER OUTPUTS FROM BUFFER I RECKON

			// Validity trees
			vtree<NxC> vtree_x (.supply = supply);
			vtree<NyC> vtree_y (.supply = supply);
			(i:0..NxC-1:vtree_x.in.d[i].t = addr_buf.out.d.d[i].t;)
			(i:0..NxC-1:vtree_x.in.d[i].f = addr_buf.out.d.d[i].f;)
			(i:0..NyC-1:vtree_y.in.d[i].t = addr_buf.out.d.d[i+NxC].t;)
			(i:0..NyC-1:vtree_y.in.d[i].f = addr_buf.out.d.d[i+NxC].f;)


			// Delay ack line. Ack line is delayed (but not the val)
			A_2C_B_X1 C2el(.c1 = vtree_x.out, .c2 = vtree_y.out, .vdd = supply.vdd, .vss = supply.vss);
			addr_buf.out.v = C2el.y;

			// delayprog<N_dly_cfg> dly(.in = tielow.y, .s = dly_cfg, .supply = supply);
			delayprog<N_dly_cfg> dly(.in = C2el.y, .s = dly_cfg, .supply = supply);

			// ACK MAY HAVE BEEN DISCONNECTED HERE
			// FOR TESTING PURPOSES
			// !!!!!!!!!!!!!!!!
			dly.out = addr_buf.out.a;
			// ACK MAY HAVE BEEN DISCONNECTED HERE
			// FOR TESTING PURPOSES
			// !!!!!!!!!!!!!!!!

			// AND trees
			pint bitval;
			andtree<NxC> atree_x[Nx];
			(k:0..Nx-1:atree_x[k].supply = supply;)
			(i:0..Nx-1:
				(j:0..NxC-1:
					bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
					[bitval = 1 ->
						atree_x[i].in[j] = addr_buf.out.d.d[j].t;
						[]bitval = 0 ->
						atree_x[i].in[j] = addr_buf.out.d.d[j].f;
						[]bitval >= 2 -> {false : "fuck"};
						]
					atree_x[i].out = outx[i];
					)
				)

			andtree<NyC> atree_y[Ny];
			(k:0..Ny-1:atree_y[k].supply = supply;)
			(i:0..Ny-1:
				(j:0..NyC-1:
					bitval = (i & ( 1 << j )) >> j; // Get binary digit of integer i, column j
					[bitval = 1 ->
						atree_y[i].in[j] = addr_buf.out.d.d[j+NxC].t;
						[]bitval = 0 ->
						atree_y[i].in[j] = addr_buf.out.d.d[j+NxC].f;
						]
					atree_y[i].out = outy[i];
					)
				)

		}


/*
 * Build an arbiter_handshake tree.
 */
		export template<pint N>
		defproc arbtree (a1of1 in[N]; a1of1 out; power supply)
		{
			bool tout;

			{ N > 0 : "What?" };

			pint i, end, j;
			i = 0;
			end = N-1;

			pint arbCount;
			arbCount = 0;
	/* Pre"calculate" the number of C cells required, look below if confused */
			*[ i != end ->
				j = 0;
				*[ i <= end ->
					j = j + 1;
					[i = end ->
						i = end+1;
						[] i+1 = end ->
						i = end+1;
						arbCount = arbCount +1;
						[] else ->
						i = i + 2;
						arbCount = arbCount +1;
						]
					]
			/*-- update range that has to be combined --*/
			// i = end+1;
				end = end+j;
				]

	/* array that holds ALL the nodes in the completion tree */
			a1of1 tmp[end+1];

	// Connecting the first nodes to the input
			(l:N:
				tmp[l] = in[l];
				)

	/* array to hold the actual C-elments, either A2C or A3C */
			[arbCount > 0 ->
				arbiter_handshake arbs[arbCount];
				]
			(h:arbCount:arbs[h].supply = supply;)

	/* Reset the variables we just stole lol */
			i = 0;
			end = N-1;
			j = 0;
			pint arbIndex = 0;

	/* Invariant: i <= end */

			*[ i != end ->
		 /*
			* Invariant: tmp[i..end] has the current signals that need to be
			* combined together, and "isinv" specifies if they are the inverted
			* sense or not
			*/
				j = 0;
				*[ i <= end ->
				/*-- there are still signals that need to be combined --*/
					j = j + 1;
					[ i = end ->
				/*-- last piece: pipe input through to next layer --*/
						tmp[end+j] = tmp[i];
						i = end+1;
						[] i+1 = end ->
				/*-- last piece: use either a 2 input C-element --*/
						arbs[arbIndex].in1 = tmp[i];
						arbs[arbIndex].in2 = tmp[i+1];
						arbs[arbIndex].out = tmp[end+j];
						arbIndex = arbIndex +1;
						i = end+1;
						[] else ->
				/*-- more to come; so use a two input C-element --*/
						arbs[arbIndex].in1 = tmp[i];
						arbs[arbIndex].in2 = tmp[i+1];
						arbs[arbIndex].out = tmp[end+j];
						arbIndex = arbIndex +1;
						i = i + 2;
						]
					]
				/*-- update range that has to be combined --*/
				i = end+1;
				end = end+j;
				j = 0;
				]

			out = tmp[end];

		}


	}

}