/************************************************************************* * * This file is part of ACT dataflow neuro library * * Copyright (c) 2022 University of Groningen - Ole Richter * Copyright (c) 2022 University of Groningen - Michele Mastella * Copyright (c) 2022 University of Groningen - Hugh Greatorex * Copyright (c) 2022 University of Groningen - Madison Cotteret * * * This source describes Open Hardware and is licensed under the CERN-OHL-W v2 or later * * You may redistribute and modify this documentation and make products * using it under the terms of the CERN-OHL-W v2 (https:/cern.ch/cern-ohl). * This documentation is distributed WITHOUT ANY EXPRESS OR IMPLIED * WARRANTY, INCLUDING OF MERCHANTABILITY, SATISFACTORY QUALITY * AND FITNESS FOR A PARTICULAR PURPOSE. Please see the CERN-OHL-W v2 * for applicable conditions. * * Source location: https://git.web.rug.nl/bics/actlib_dataflow_neuro * * As per CERN-OHL-W v2 section 4.1, should You produce hardware based on * these sources, You must maintain the Source Location visible in its * documentation. * ************************************************************************** */ import "../../dataflow_neuro/cell_lib_async.act"; import "../../dataflow_neuro/cell_lib_std.act"; import "../../dataflow_neuro/treegates.act"; import "../../dataflow_neuro/primitives.act"; import "../../dataflow_neuro/coders.act"; // import tmpl::dataflow_neuro; // import tmpl::dataflow_neuro; import std::channel; open std::channel; namespace tmpl { namespace dataflow_neuro { // Circuit for storing registers using AER // The block has the parameters: // lognw -> log2(number of words), parameters you can store // wl -> word length, length of each word // N_dly_cfg -> the number of config bits in the ACK delay line // The block has the pins: // in -> input data, // - the first bit is write/read_B // - the next lognw bits describe the location, // - the last wl the word to write // data -> the data saved in the flip flop, sized wl x nw export template defproc register_w (avMx1of2<1+lognw+wl> in; d1of data[1< _in_temp; (i:1+lognw+wl:_in_temp.d[i] = in.d.d[i];) vtree<1+lognw+wl> val_input(.in = _in_temp,.out = _in_v_temp, .supply = supply); sigbuf_1output<4> val_input_X(.in = _in_v_temp,.out = in.v,.supply = supply); // Generation of the fake clock pulse (inverted because the ff clocks are low_active) delayprog clk_dly(.in = _in_v_temp, .out = _clock_temp,.s = dly_cfg, .supply = supply); INV_X1 inv_clk(.a = _clock_temp,.y = _clock_temp_inv,.vdd = supply.vdd,.vss = supply.vss); sigbuf_1output<4> clk_X(.in = _clock_temp_inv,.out = _clock,.supply = supply); // Sending back to the ackowledge delayprog ack_dly(.in = _clock, .out = _in_a_temp,.s = dly_cfg, .supply = supply); sigbuf_1output<4> ack_input_X(.in = _in_a_temp,.out = in.a,.supply = supply); //Reset Buffers bool _reset_BX,_reset_mem_BX,_reset_mem_BXX[nw*wl]; BUF_X1 reset_buf_BX(.a=reset_B, .y=_reset_BX,.vdd=supply.vdd,.vss=supply.vss); BUF_X1 reset_buf_BXX(.a=reset_mem_B, .y=_reset_mem_BX,.vdd=supply.vdd,.vss=supply.vss); sigbuf reset_bufarray(.in=_reset_mem_BX, .out=_reset_mem_BXX,.supply=supply); // Creating the different flip flop arrays bool _out_encoder[nw],_clock_word_temp[nw],_clock_word[nw],_clock_buffer_out[nw*wl]; andtree atree[nw]; AND2_X1 and_encoder[nw]; sigbuf clock_buffer[nw]; DFFQ_R_X1 ff[nw*wl]; pint bitval; (k:nw:atree[k].supply = supply;) (word_idx:nw: // Decoding the bit pattern to understand which word we are looking at (pin_idx:lognw: bitval = (word_idx & ( 1 << pin_idx )) >> pin_idx; // Get binary digit of integer i, column j [bitval = 1 -> atree[word_idx].in[pin_idx] = in.d.d[pin_idx+wl].t; [] bitval = 0 -> atree[word_idx].in[pin_idx] = in.d.d[pin_idx+wl].f; []bitval >= 2 -> {false : "fuck"}; ] ) // Activating the fake clock for the right word atree[word_idx].out = _out_encoder[word_idx]; and_encoder[word_idx].a = _out_encoder[word_idx]; and_encoder[word_idx].b = _clock; and_encoder[word_idx].y = _clock_word_temp[word_idx]; and_encoder[word_idx].vdd = supply.vdd; and_encoder[word_idx].vss = supply.vss; clock_buffer[word_idx].in = _clock_word_temp[word_idx]; clock_buffer[word_idx].supply = supply; // Describing all the FF and their connection (bit_idx:wl: ff[bit_idx+word_idx*(wl)].clk_B = clock_buffer[word_idx].out[bit_idx]; ff[bit_idx+word_idx*(wl)].d = in.d.d[bit_idx].t; ff[bit_idx+word_idx*(wl)].q = data[word_idx].d[bit_idx]; ff[bit_idx+word_idx*(wl)].reset_B = _reset_mem_BXX[bit_idx+word_idx*(wl)]; ff[bit_idx+word_idx*(wl)].vdd = supply.vdd; ff[bit_idx+word_idx*(wl)].vss = supply.vss; ) ) } // Circuit for storing and reading registers using AER // The block has the parameters: // lognw -> log2(number of words), parameters you can store // wl -> word length, length of each word // N_dly_cfg -> the number of config bits in the ACK delay line // The block has the pins: // in -> input data, // - the MSB is write/read_B // - the next MSB bits (size lognw) are the location, // - the LSB (size wl) are the word to write // out -> in case a reading phase is required, the output is used to show the stored data // - the MSB bits (size lognw) tell the read register // - the LSB bits (size wl) tell the word read // data -> the data saved in the flip flop, sized wl x nw export template defproc register_rw (avMx1of2<1+lognw+wl> in; avMx1of2 out; d1of data[1< _in_temp2,_in_read,_in_write; avMx1of2<1>_in_flag; // Read or write? AND2_X1 ack_and(.a = _in_temp2.a,.b = _ff_v,.y = in.a,.vdd = supply.vdd,.vss = supply.vss); in.v = _in_temp2.v; _in_flag.d.d[0] = in.d.d[lognw+wl]; (i:lognw+wl:_in_temp2.d.d[i] = in.d.d[i];) demux read_write_demux(.in = _in_temp2,.out1 = _in_read, .out2 = _in_write, .cond = _in_flag,.reset_B = reset_B); read_write_demux.supply= supply; //WRITE PATH // Validation Mx1of2 _in_write_temp; (i:lognw+wl:_in_write_temp.d[i] = _in_write.d.d[i];) vtree val_input_write(.in = _in_write_temp,.out = _in_write.v, .supply = supply); // Acknowledgment delayprog ack_dly(.in = _clock, .out = _in_write.a,.s = dly_cfg, .supply = supply); // Generation of the fake clock pulse (inverted because the ff clocks are low_active) delayprog clk_dly(.in = _in_write.v, .out = _clock_temp,.s = dly_cfg, .supply = supply); INV_X1 inv_clk(.a = _clock_temp,.y = _clock_temp_inv,.vdd = supply.vdd,.vss = supply.vss); sigbuf_1output<4> clk_X(.in = _clock_temp_inv,.out = _clock,.supply = supply); //READ PATH //Validation Mx1of2 _in_read_temp; (i:lognw+wl:_in_read_temp.d[i] = _in_read.d.d[i];) vtree val_input_read(.in = _in_read_temp,.out = _in_read.v, .supply = supply); vtree ff_validator; Mx1of2 _out_temp; (i:wl:_out_temp.d[i] = out.d.d[i];) ff_validator.in = _out_temp; ff_validator.out = _ff_v; ff_validator.supply = supply; // Acknowledgment _in_read.a = _ff_v; //The circuit is ack when flip flop data are valid //Reset Buffers bool _reset_BX,_reset_mem_BX,_reset_mem_BXX[nw*wl*2]; BUF_X1 reset_buf_BX(.a=reset_B, .y=_reset_BX,.vdd=supply.vdd,.vss=supply.vss); BUF_X1 reset_buf_BXX(.a=reset_mem_B, .y=_reset_mem_BX,.vdd=supply.vdd,.vss=supply.vss); sigbuf reset_bufarray(.in=_reset_mem_BX, .out=_reset_mem_BXX,.supply=supply); // Creating the different flip flop arrays bool _out_encoder[nw],_clock_word_temp[nw],_clock_word[nw],_clock_buffer_out[nw*wl]; andtree atree[nw]; d1of _data_f; AND2_X1 and_encoder[nw]; AND3_X1 reading_activator_t[nw*wl],reading_activator_f[nw*wl]; sigbuf clock_buffer[nw]; DFFQ_R_X1 ff_t[nw*wl],ff_f[nw*wl]; OR2_X1 ff_val[wl]; (i:wl..lognw:out.d.d[i] = in.d.d[i];) bool __ffout_dualrail[nw*wl]; pint bitval; (k:nw:atree[k].supply = supply;) (word_idx:nw: // Decoding the bit pattern to understand which word we are looking at (pin_idx:lognw: bitval = (word_idx & ( 1 << pin_idx )) >> pin_idx; // Get binary digit of integer i, column j [bitval = 1 -> atree[word_idx].in[pin_idx] = in.d.d[pin_idx+wl].t; [] bitval = 0 -> atree[word_idx].in[pin_idx] = in.d.d[pin_idx+wl].f; []bitval >= 2 -> {false : "fuck"}; ] ) // Encode which work is the right one atree[word_idx].out = _out_encoder[word_idx]; // READ: use the encoder selection to read the value // WRITE: Activating the fake clock for the right word and_encoder[word_idx].a = _out_encoder[word_idx]; and_encoder[word_idx].b = _clock; and_encoder[word_idx].y = _clock_word_temp[word_idx]; and_encoder[word_idx].vdd = supply.vdd; and_encoder[word_idx].vss = supply.vss; clock_buffer[word_idx].in = _clock_word_temp[word_idx]; clock_buffer[word_idx].supply = supply; // Describing all the FF and their connection (bit_idx:wl: ff_t[bit_idx+word_idx*(wl)].clk_B = clock_buffer[word_idx].out[bit_idx]; ff_t[bit_idx+word_idx*(wl)].d = in.d.d[bit_idx].t; ff_t[bit_idx+word_idx*(wl)].q = data[word_idx].d[bit_idx]; ff_t[bit_idx+word_idx*(wl)].reset_B = _reset_mem_BXX[bit_idx+word_idx*(wl)]; ff_t[bit_idx+word_idx*(wl)].vdd = supply.vdd; ff_t[bit_idx+word_idx*(wl)].vss = supply.vss; ff_f[bit_idx+word_idx*(wl)].clk_B = clock_buffer[word_idx].out[bit_idx+wl-1]; ff_f[bit_idx+word_idx*(wl)].d = in.d.d[bit_idx].f; ff_f[bit_idx+word_idx*(wl)].reset_B = _reset_mem_BXX[bit_idx+word_idx*(wl)+nw-1]; ff_f[bit_idx+word_idx*(wl)].vdd = supply.vdd; ff_f[bit_idx+word_idx*(wl)].vss = supply.vss; reading_activator_t[bit_idx+word_idx*(wl)].a = _in_flag.d.d[0].t; reading_activator_t[bit_idx+word_idx*(wl)].b = ff_t[bit_idx+word_idx*(wl)].q; reading_activator_t[bit_idx+word_idx*(wl)].c = _out_encoder[word_idx]; reading_activator_t[bit_idx+word_idx*(wl)].y = out.d.d[bit_idx].t; reading_activator_t[bit_idx+word_idx*(wl)].vdd = supply.vdd; reading_activator_t[bit_idx+word_idx*(wl)].vss = supply.vss; reading_activator_f[bit_idx+word_idx*(wl)].a = _in_flag.d.d[0].f; reading_activator_f[bit_idx+word_idx*(wl)].b = ff_f[bit_idx+word_idx*(wl)].q; reading_activator_f[bit_idx+word_idx*(wl)].y = out.d.d[bit_idx].f; reading_activator_f[bit_idx+word_idx*(wl)].vdd = supply.vdd; reading_activator_f[bit_idx+word_idx*(wl)].vss = supply.vss; reading_activator_f[bit_idx+word_idx*(wl)].c = _out_encoder[word_idx]; ) ) } }}