| PARAMETER |
VALUE |
DESCRIPTION |
| -proc |
= 1 | # Number of processors |
| -vmmx |
= 1 | # Support for vector MMX |
| -thpp |
= 1 | # Threads per processor |
| -ooo | = 1 | # Define Out of order Architectural paradigm |
| -prog |
= ev5:mp2d_vmmx | # application to simulate |
| -atomprog |
= benchmarks/mpeg_dec/trace/mpeg2decode_vmmx.atom | #Atom instrumented application |
| -atomargs |
= mpeg2decode_vmmx -b mei16v2.m2v -r -f -o0 tmp%d | # Arguments to ATOM application |
| -atombb |
= benchmarks/mpeg_dec/trace/mpeg2decode_vmmx.bb.gz | |
| -mmxbbl |
= benchmarks/mpeg_dec/trace/mpeg2decode.vmmx.tbl | # basic blocks trace |
| -mmxtrace |
= benchmarks/mpeg_dec/trace/mpeg2decode.vmmx.tr.gz | # MMX trace |
| -mmxpcrange |
= mpeg_dec/trace/mpeg2decode.vmmx.pcr | # pc range trace |
| # Define functional units | ||
| -fu |
= ialu1:i:A,LO,SH,CV,M,D,SQ,C,R,CB,UB,CJ,UJ,SY:1 | # Integer functional units |
| -fu |
= ialu2:i:A,LO,SH,CV,M,D,SQ,C,R,CB,UB,CJ,UJ,SY:1 | |
| -fu |
= ialu3:i:A,LO,SH,CV,M,D,SQ,C,R,CB,UB,CJ,UJ,SY:1 | |
| -fu |
= ialu4:i:A,LO,SH,CV,M,D,SQ,C,R,CB,UB,CJ,UJ,SY:1 | |
| -fu |
= falu5:fp:A,LO,SH,CV,M,D,SQ,MAC,CB,UB,CJ,UJ:1 | # Floating Point functional units |
| -fu |
= falu6:fp:A,LO,SH,CV,M,D,SQ,MAC,CB,UB,CJ,UJ:1 | |
| -fu |
= falu7:fp:A,LO,SH,CV,M,D,SQ,MAC,CB,UB,CJ,UJ:1 | |
| -fu |
= falu8:fp:A,LO,SH,CV,M,D,SQ,MAC,CB,UB,CJ,UJ:1 | |
| -fu |
= valu9:v:A,LO,SH,CV,M,D,SQ,MAC:2 | # Vector Functional units |
| -fu |
= valu10:v:A,LO,SH,CV,M,D,SQ,MAC:2 | |
| -fu |
= valu11:v:A,LO,SH,CV,M,D,SQ,MAC:2 | |
| -fu |
= valu12:v:A,LO,SH,CV,M,D,SQ,MAC:2 | |
| -fu |
= dport1:m:LD,ST,GT,SC:1 | # data cache ports |
| -fu |
= dport2:m:LD,ST,GT,SC:1 | |
| -fu |
= iport1:fe:IFETCH:1 | # instruction cache ports |
| # Fetch Stage |
||
| -fetch:smf |
= 1 | # Simultaneous fetch [1..16] |
| -fetch:width | = 4 | # Fetch Width [1..64] |
| -fetch:perfect |
= 1 | # Perfect fetch 0,1 |
| -fetch:perfect |
= 1 | # Collapsing Buffer 0,1 |
| -fetch:ub | = 8 | # Unresolved Branches [0..8] |
| -fetch:iil | = 4 | # Number of Instructions per icache line [1..16] |
| # Decode Stage | ||
| -decode:width | = 4 | # Renamig Issue Width [1..64] |
| -decode:win | = 128 | # Renaming Window slots [1..512] |
| -decode:rlprio | = 0 | # Ready list Priority [0..2] |
| -decode:splitfl | = 0 | # Split Free List 0,1 |
| -decode:kmr | = 0 | # Keep Memory Registers [0..128] |
| -decode:rtail | = 1 | # Release to tail 0,1 |
| # Issue and Execution |
||
| -issue:prio | = 0 | # Priority Policy [0..4] |
| -issue:wdepth | = 4 | # Wakeup depth [0..512] |
| # Integer Execution Pipeline |
||
| -int:width | = 4 | # Integer Issue Width [1..64] |
| -int:win |
= 32 | # Integer issue Queue [1..512] |
| -int:reg | = 0:31:1:10:6:64:0:0 | # Integer
Register File |
| # Floating Point Pipeline |
||
| -fp:width | = 4 | # Floating Point Issue Width [1..64] |
| -fp:win | = 16 | # Floating Point Issue Queue [1..512] |
| -fp:reg | = 32:63:1:6:4:96:0:0 | # Floating Point Register File |
| # Vector/u-SIMD Pipeline |
||
| -vec:width | = 2 | # Vector Issue Width [1..64] |
| -vec:win | = 16 | # Vector Issue Queue [1..512] |
| -vec:reg | = 64:79:8:3:2:36:0:0 | # Vector Register File |
| -vec:accreg | = 96:97:1:2:1:8:0:0 | # Accumulator Register File |
| # Memory Unit |
||
| -mem:dep:width | = 4 | # Memory Disambiguation Issue Width [1..64] |
| -mem:dep:win | = 64 |
# Memory Disambiguation Queue [1..512] |
| -mem:dep:skipd2 | = 1 | # Skipd2 0,1 |
| -mem:width | = 4 | # Memory Unit Issue Width [1..64] |
| -mem:win | = 64 | # Memory Unit Issue Queue [1..512] |
| -mem:model | = 4 | # Memory OOO Model [0..4] |
| -mem:commit | = 0 | # Memory Commit Style 0,1 |
| -mem:prio | = bls | # Memory Priority Policy |
| -mem:vbypass | = 0 | # Allow vby Bypass 0,1 |
| -mem:sbypass | = 0 | # Allow sby Bypass 0,1 |
| -mem:cachel | = 0 | # Cache Loads 0,1 |
| # Graduation Stage |
||
| -grad:width |
# Number of instructions graduated per cycle [1..64] | |
| -grad:win | # Graduation Window Slots: ROB [1..512] | |
| -grad:fswkup | # Fast Store Wakeup 0,1 | |
| -grad:late | # Commit Model 0,1 | |
| # Debug information | ||
| -bbcycles | = 0 | # Count bb Cycles 0,1 |
| -debug |
= 0 | # Debug trace [0..inf] |
| -dcycle |
= 0 | # Debug Cycle [0..inf] |
| -dpipe | = 0 | # Pipeline Start [0..inf] |
| -dtype | = 0 | # Debug Type [0..inf] |
| -od | = 0 | # Output Directory output_dir |
| # Memory system |
||
| -memsys |
= 2 | # 0: ideal # 1: multiport # 2: vector cache # 3: dual_cache -> NOT VALID # 4: simple_bank |
| -memlat |
=1 |
# Memory Latency |
| -memw |
=1 |
# Memory Width [0..128] (Effective bandwith) |
| -memlanes |
=0 |
# Memory Lanes [0..128] |
| -scache_perfect |
=0 |
|
| -memprof |
=0 |
# Memory Profiling |
| -emuvcache |
=0 |
# Emulate Vector Cache [0..2] |
| # Main
memory |
BUS_F:BUS_BW:UNI/BI:DIM: LAT_UP:LAT_DOWN:LAT_CTR:SMART/DUMMY |
|
| -RDRAM:conf | 200:2:U:8:1:49:49:2:SMART | # RAMbus configuration |
| # Cache Memory |
||
| -cache:fetch_strat |
= 1 | # Data Cache fetchApproach 0: Vector Cache 1: Multi address generation 2: Collapsing Buffer 3: Conventional cache |
| -cache:ifetch_strat |
= 3 | # Instruction Cache fetch approach |
| -cache:bypassVEC |
= 0 | # Vector Cache Bypass 0,1 |
| -cache:bypassVECto |
= L1 | # Bypass
vector to cache level |
| -cache:default_layer |
= L1 | # Cache Default Layer |
| -cache:sel_inv |
= NSI | # Selective Invalidation |
| -cache:sim_Icache |
= 1 | # Simulate Instruction Cache 0,1 |
| -cache:allow_unalign |
= 0 | # Allow unaligned access 0,1 |
| -cache:allow_compres |
= 0 | # Allow compression 0,1 |
| -cache:sharing_threads |
= 0 | # SMT common space |
| - cache:memlanes | # Mem lanes [0 ..128] |
|
| # Cache Levels | |
|
| -cache:L1 |
=
L1:8:ENABLED:0:NONE:NONE:NONE: L2:NI:WT:WNA:128:4:1:2:1:8:4:1:8 |
|
| -cache:L2 | =L2:1:ENABLED:1:L1:NONE:NONE: NONE:IN:CB:WNA:4096:16:4:2:10:8:4:1:8:FIXED |
|
| -cache:I1 |
=
I1:4:ENABLED:0:NONE:NONE:NONE: L2:NI:WT:WNA:256:4:4:2:1:2:2:1:4:FIXED |
|
| # Branch Predictor |
||
| -bpred |
= 2lev | # Branch Predictor Type: nottaken, taken, ideal, perfect, bimod, 2lev, comb, none |
| -bpred:bimod |
= 4096 | |
| -bpred:2lev |
= 1:16384:12:1 | |
| -bpred:comb |
= 1024 | |
| -bpred:ras |
= 8 | |
| -bpred:btb |
= 1024:4 | |
| -bpred:update |
=
ct |
# Branch predictor Update: id, wb, ct |