B. SystemC Simulation Example

This SystemC simulation example simulates a system with two TTA cores communicating through two shared registers with memory mapped access. One of the registers (busyReg) is used to denote that the ``receiver TTA'' is busy processing the previously written data which the ``sender TTA'' writes (to dataReg). The receiver uses iprintf to print out the received data.

The C source codes for the programs running in the two TTAs are shown below:. mem_map.h defines constants for the memory mapped I/O addresses. This file is included by both TTA programs and the SystemC simulation code presented later.

mmio_recv.c: The C program running in the receiver TTA:


#include <stdio.h>
#include <stdlib.h>

#include "mem_map.h"

/**
 * mmio_recv.c: 
 *
 * Polls an I/O register and prints out its value whenever it changes.
 */
int main() {
    int last_value = 0;
    char values_received = 0;
    do {
        int new_value;
        *BUSY_ADDR = values_received;
        // should place a barrier here to ensure the compiler doesn't
        // move the while loop above the write (both access constant
        // addresses thus are trivial to alias analyze)
        while ((new_value = *DATA_ADDR) == last_value);
        ++values_received;
        iprintf("mmio_recv got %d\n", new_value);
        last_value = new_value;
    } while(1);
    return EXIT_SUCCESS;
}

mmio_send.c: The C program running in the sender TTA.

#include <stdio.h>
#include <stdlib.h>

#include "mem_map.h"

/**
 * mmio_send.c: 
 *
 * Write data to a memory mapped shared register. Another TTA (see 
 * mmio_recv.c) is polling this register and printing it to a console 
 * whenever its value changes.
 */
int main() {
    /* These should get printed to the console through the another TTA. */
    char old_values_received = 0, values_received = 0;
    for (int i = 1; i <= 10; ++i) {
        int new_value = 1234 * i;
        *DATA_ADDR = new_value;
        /* Wait until the other TTA has processed the value. This is
           signalled by writing the count of values received so far
           to the BUSY_ADDR. */
        while ((values_received = *BUSY_ADDR) == old_values_received); 
        old_values_received = values_received;
    }
    return EXIT_SUCCESS;
}

mem_map.h: The memory mapped I/O addresses as constants:


#define LAST_DMEM_ADDR (32*1024 - 1)
#define DATA_ADDR ((volatile int*)(LAST_DMEM_ADDR + 1))
#define BUSY_ADDR ((volatile char*)(LAST_DMEM_ADDR + 1 + 4))

The simple register is defined in register.hh and the load-store unit simulation model that overrides the default TTA simulator one in lsu_model.hh.

register.hh: SystemC model for an integer register:


#ifndef SC_REGISTER_HH
#define SC_REGISTER_HH

#include <systemc>

SC_MODULE(Register) {
    sc_in<int> input;
    sc_out<int> output;
    sc_in<bool> updateValue;
    sc_in<bool> clock;

    int value;

    void run() {
        if (updateValue) {
            value = input;
        }
        output = value;
    }

    SC_CTOR(Register) {
        SC_METHOD(run);
        sensitive << clock.pos();
        sensitive << input;
        sensitive << updateValue;
        value = 0;
    }
};

#endif

lsu_model.hh: The load-store unit model for the TTAs:


#ifndef SC_LSU_MODEL_HH
#define SC_LSU_MODEL_HH

#include <systemc>
#include <tce_systemc.hh>
#include "mem_map.h"

TCE_SC_OPERATION_SIMULATOR(LSUModel) {
    /* The same LSU simulation model is used for the sender and
       the receiver TTAs. The former writes to the data reg and
       reads the busy reg, latter vice-versa. */
    sc_in<int> reg_value_in;
    sc_out<int> reg_value_out;
    sc_out<bool> reg_value_update;

    TCE_SC_OPERATION_SIMULATOR_CTOR(LSUModel) {}

    TCE_SC_SIMULATE_CYCLE_START {
        // initialize the update signal to 0 so we won't update any 
        // garbage to the register unless a write operation writes 
        // to it
        reg_value_update = 0;
    }

    TCE_SC_SIMULATE_STAGE {
        unsigned address = TCE_SC_UINT(1);
        // overwrite only the stage 0 simulation behavior of loads and 
        // stores to out of data memory addresses
        if (address <= LAST_DMEM_ADDR || TCE_SC_OPSTAGE > 0) {
            return false; 
        }
        // do not check for the address, assume all out of data memory
        // addresses update the shared register value
        if (TCE_SC_OPERATION.writesMemory()) {
            int value = TCE_SC_INT(2);
            reg_value_out.write(value);
            reg_value_update.write(1);
        } else { // a load, the operand 2 is the data output 
            int value = reg_value_in.read();
            TCE_SC_OUTPUT(2) = value;
        }
        return true;
    }
};

#endif

Finally, the actual main SystemC simulation code is defined as follows. As can be noted, both of the TTA cores use the same architecture loaded from mmio.adf of which contents are not presented here. In order to make this example work, the TCE-included minimal_with_io.adf architecture can be used instead.

simulator.cc: The main simulation code:


#include <iostream>

#include "systemc.h"

#include "register.hh"
#include "lsu_model.hh"

int sc_main(int argc, char* argv[]) {

    // 100MHz clock frequency (1 us clock period)
    sc_clock clk("clock", 1, SC_US);
    
    sc_signal<bool> glock;
    sc_signal<int> busyRegDataIn;
    sc_signal<int> dataRegDataIn;
    sc_signal<int> busyRegDataOut;
    sc_signal<int> dataRegDataOut;
    sc_signal<bool> busyRegUpdate;
    sc_signal<bool> dataRegUpdate;

    Register dataReg("data_reg");
    dataReg.input(dataRegDataIn);
    dataReg.output(dataRegDataOut);
    dataReg.updateValue(dataRegUpdate);
    dataReg.clock(clk.signal());

    Register busyReg("busy_reg");
    busyReg.input(busyRegDataIn);
    busyReg.output(busyRegDataOut);
    busyReg.updateValue(busyRegUpdate);
    busyReg.clock(clk.signal());

    // the sender TTA:

    TTACore sender_tta("sender_tta", "mmio.adf", "mmio_send.tpef");
    sender_tta.clock(clk.signal());
    sender_tta.global_lock(glock);

    // the LSU writes to the data register and reads from the 
    // busy reg to synchronize

    LSUModel lsu1("LSU1");
    sender_tta.setOperationSimulator("LSU", lsu1);
    lsu1.reg_value_in(busyRegDataOut);
    lsu1.reg_value_out(dataRegDataIn);
    lsu1.reg_value_update(dataRegUpdate);

    // the receiver TTA:

    TTACore recv_tta("recv_tta", "mmio.adf", "mmio_recv.tpef");
    recv_tta.clock(clk.signal());
    recv_tta.global_lock(glock);

    // the LSU writes to the busy reg to synchronize the execution
    // and reads from the data reg

    LSUModel lsu2("LSU2");
    recv_tta.setOperationSimulator("LSU", lsu2);
    lsu2.reg_value_in(dataRegDataOut);
    lsu2.reg_value_out(busyRegDataIn);
    lsu2.reg_value_update(busyRegUpdate);

    // simulate for 0.2 sec = 200K cycles
    sc_time runtime(0.2, SC_SEC);
    sc_start(runtime);

    return EXIT_SUCCESS;
}

The simulator can be compiled with the following command (assuming gcc used):

g++ `tce-config --includes --libs` simulator.cc -lsystemc -O3 -o simulator

The simulation should produce output similar to the following:

./simulator

             SystemC 2.2.0 --- Aug 30 2010 13:05:02
        Copyright (c) 1996-2006 by all Contributors
                    ALL RIGHTS RESERVED
mmio_recv got 1234
mmio_recv got 3702
mmio_recv got 4936
mmio_recv got 6170
mmio_recv got 7404
mmio_recv got 9872
mmio_recv got 12340

Pekka Jääskeläinen 2018-03-12