Difference between revisions of "Benchmarking: Coremark"
Jump to navigation
Jump to search
| (48 intermediate revisions by 4 users not shown) | |||
| Line 17: | Line 17: | ||
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)" | FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)" | ||
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\" -funroll-all-loops --param max-inline-insns-auto=200 | CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\" -funroll-all-loops --param max-inline-insns-auto=200 | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | * Version for APM XGene | ||
| + | <syntaxhighlight> | ||
| + | #PORT_CFLAGS = -O2 | ||
| + | PORT_CFLAGS = -static -O3 -flto -fwhole-program -funroll-all-loops -mnew-cost-model -mbranch-cost=8 -fipa-pta -DTIME -DHZ=50 | ||
</syntaxhighlight> | </syntaxhighlight> | ||
| Line 26: | Line 32: | ||
root@jph1:~/coremark_v1.0# make | root@jph1:~/coremark_v1.0# make | ||
</syntaxhighlight> | </syntaxhighlight> | ||
| + | |||
| + | === Faster systems === | ||
If you have a fast system (faster than a RaspberryPi), increase the number of iterations to increase the test run to give a more meaningful result: | If you have a fast system (faster than a RaspberryPi), increase the number of iterations to increase the test run to give a more meaningful result: | ||
<syntaxhighlight> | <syntaxhighlight> | ||
root@jph1:~/coremark_v1.0# make ITERATIONS=10000 | root@jph1:~/coremark_v1.0# make ITERATIONS=10000 | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | === 32-bit ARM systems === | ||
| + | Coremark seems to have an issue when not running in a linux64 environment (e.g. 32-bit ARM). Add a flag to the <code>make</code> command: | ||
| + | <syntaxhighlight> | ||
| + | root@jph1:~/coremark_v1.0# make PORT_DIR=simple | ||
</syntaxhighlight> | </syntaxhighlight> | ||
== Run benchmark (multi thread) == | == Run benchmark (multi thread) == | ||
| + | Rebuild from scratch | ||
| + | <syntaxhighlight> | ||
| + | make clean | ||
| + | |||
| + | # NOTE APM ARM 64 flags below (just added -DMULTITHREAD=8 -DUSE_PTHREAD) | ||
| + | # edit the linux64/core_portme.mak file | ||
| + | PORT_CFLAGS = -static -O3 -flto -fwhole-program -funroll-all-loops -fipa-pta -DTIME -DHZ=50 -DMULTITHREAD=8 -DUSE_PTHREAD | ||
| + | .. | ||
| + | LFLAGS_END += -lrt -lpthread | ||
| + | |||
| + | # make | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | Jon on MIPS | ||
<syntaxhighlight> | <syntaxhighlight> | ||
root@jph1:~/coremark_v1.0# make XCFLAGS="-DMULTITHREAD=8 -DUSE_PTHREAD" | root@jph1:~/coremark_v1.0# make XCFLAGS="-DMULTITHREAD=8 -DUSE_PTHREAD" | ||
| Line 38: | Line 66: | ||
== Results == | == Results == | ||
{| class="wikitable" style="text-align:center; width:100%; " | {| class="wikitable" style="text-align:center; width:100%; " | ||
| − | |+ Coremark V1.0 results | + | |+ Coremark V1.0 results with -O3 compile flags |
| + | |- | ||
| + | ! scope="col" | CPU !! scope="col" | Freq !! scope="col" | Cores !! scope="Cores" | Coremark !! scope="col" | Coremark/<br>MHz !! scope="col" | Coremark/<br>Core !! scope="col" | Coremark/<br>Core/MHz !! scope="col" | Parallel Execution !! scope="col" | Compile Flags | ||
| + | |- | ||
| + | | colspan="9"|''MIPS'' | ||
| + | |- | ||
| + | ! scope="row" | Cavium Octeon II CN6645 || 1.50GHz || 10 || 31172 || 20.781 || 3117 || 2.0781 || 10 Threads || -O3 -funroll-all-loops --param max-inline-insns-auto=200 | ||
| + | |- | ||
| + | | colspan="9"|''ARM'' | ||
| + | |- | ||
| + | ! scope="row" | Calxeda Highbank (Cortex A9) || 1.40GHz || 4 || 12074|| 8.624 || 3018 || 2.1557 || 4 Threads || -O3 | ||
| + | |- | ||
| + | ! scope="row" | Calxeda Midway (Cortex A15) || 1.50GHz || 4 || 22013 || 14.675|| 5503 || 3.6687 || 4 Threads || -O3 | ||
| + | |- | ||
| + | ! scope="row" | APM XGene1 (Cortex A57) || 2.40GHz || 8 || 54408|| 22.670 || 6801 || 2.8338 || 8 Threads || -O3 | ||
| + | |- | ||
| + | ! scope="row" | AMD Seattle (Cortex A57) || 1.50GHz || 6 || 34834 || 23.223 || 5805 || 3.8704 || 6 Threads || -O3 | ||
| + | |- | ||
| + | | colspan="9"|''Intel'' | ||
| + | |- | ||
| + | ! scope="row" | Intel Atom C2550 || 2.4GHz || 4 || 29333 || 12.222|| 7333 || 3.0554 || 4 Threads || -O3 | ||
| + | |- | ||
| + | ! scope="row" | Intel Atom C2750 || 2.4GHz || 8 || 64140 || 26.725 || 8017 || 3.340 || 8 Threads || -O3 Centos 7 | ||
| + | |- | ||
| + | ! scope="row" | Intel Xeon E5-2660 v1 (no HT) || 2.2GHz || 8 || 105540 || 27.973 || 13192 || 5.9966|| 8 Threads || -O3 | ||
| + | |- | ||
| + | ! scope="row" | Intel Xeon E5-2640 v2 (no HT) || 2.0GHz || 8 || 111095 || 55.548 || 13886 || 6.9434 || 8 Threads || -O3 | ||
| + | |- | ||
| + | ! scope="row" | Intel Xeon E5-2650 v2 (no HT) || 2.6GHz || 8 || 120782 || 46.455 || 15097 || 5.8068 || 8 Threads || -O3 | ||
| + | |- | ||
| + | ! scope="row" | Intel Xeon E5-2660 v2 (no HT) || 2.2GHz || 10 || 134147 || 60.976 || 13414 || 6.0976 || 10 Threads || -O3 | ||
| + | |- | ||
| + | ! scope="row" | Intel Xeon D-1540 || 2.00GHz || 8 || 121285 || 60.6425 || 15160.6 || 7.58 || 8 Threads || -O3 | ||
| + | |- | ||
| + | ! scope="row" | Intel Xeon E5-2630 v3 (HT) || 2.4GHz || 8 || 135249 || 56.35 || 16906 || 7.04 || 8 Threads || -O3 | ||
| + | |- | ||
| + | ! scope="row" | Intel Xeon E5-2640 v3 (no HT) || 2.6GHz || 8 || 143377|| 55.15 || 17922 || 6.89 || 8 Threads || -O3 | ||
| + | |- | ||
| + | ! scope="row" | Intel Xeon E5-2660 v3 (no HT) || 2.6GHz || 10 || 169481 || 65.18 || 16948 || 6.51 || 10 Threads || -O3 | ||
| + | |- | ||
| + | ! scope="row" | Intel Xeon E5-2680 v3 (HT) || 2.5GHz || 12 || 193746|| 77.49 || 16145 || 6.45 || 12 Threads || -O3 | ||
| + | |- | ||
| + | ! scope="row" | Intel Xeon E5-2697 v3 (no HT) || 2.6GHz || 14 || 250387|| || || || Threads || -O3 | ||
| + | |- | ||
| + | | colspan="9"|''AMD'' | ||
| + | |- | ||
| + | ! scope="row" | AMD Opteron 6380 || 2.5GHz || 16 || 149107 || 59.643 || 9319 || 3.7277 || 16 Threads || -O3 | ||
| + | |- | ||
| + | |} | ||
| + | |||
| + | {| class="wikitable" style="text-align:center; width:100%; " | ||
| + | |+ Coremark V1.0 results with -O2 compile flags | ||
| + | |- | ||
| + | ! scope="col" | CPU !! scope="col" | Freq !! scope="col" | Cores !! scope="Cores" | Coremark !! scope="col" | Coremark/<br>MHz !! scope="col" | Coremark/<br>Core !! scope="col" | Coremark/<br>Core/MHz !! scope="col" | Parallel Execution !! scope="col" | Compile Flags | ||
| + | |- | ||
| + | | colspan="9"|''MIPS'' | ||
| + | |- | ||
| + | ! scope="row" | Cavium Octeon II CN6645 || 2.50GHz || 10 || 25671 || 10.268 || 2567 || 1.0268 || 10 Threads || | ||
| + | |- | ||
| + | | colspan="9"|''ARM'' | ||
| + | |- | ||
| + | ! scope="row" | Calxeda Highbank (Cortex A9) || 1.40GHz || 4 || 11764|| 8.403 || 2941 || 2.1007 || 4 Threads || | ||
| + | |- | ||
| + | ! scope="row" | Calxeda Midway (Cortex A15) || 1.50GHz || 4 || 20284 || 13.523 || 5071 || 3.3807 || 4 Threads || | ||
| + | |- | ||
| + | ! scope="row" | APM XGene1 (Cortex A57) || 2.40GHz || 8 || 49304 || 20.543 || 6163 || 2.5679 || 8 Threads || | ||
| + | |- | ||
| + | ! scope="row" | AMD Seattle (Cortex A57) || 1.50GHz || 6 || 27833 || 17.9 || 4638 || 3.0925 || 6 Threads || | ||
| + | |- | ||
| + | | colspan="9"|''Intel'' | ||
| + | |- | ||
| + | ! scope="row" | Intel Atom C2550 || 2.4GHz || 4 || 26044 || 10.852 || 6511 || 2.7129 || 4 Threads || | ||
| + | |- | ||
| + | ! scope="row" | Intel Atom C2750 || 2.4GHz || 8 || 50873 || 21.197 || 6359 || 2.6496 || 8 Threads || | ||
| + | |- | ||
| + | ! scope="row" | Intel Xeon E5-2660 v1 (no HT)|| 2.2GHz || 8 || 91152 || 41.433 || 11394 || 5.1791 || 8 Threads || | ||
|- | |- | ||
| − | ! scope=" | + | ! scope="row" | Intel Xeon E5-2640 v2 (no HT) || 2.0GHz || 8 || 96455 || 48.228 || 12056 || 6.0284 || 8 Threads || |
|- | |- | ||
| − | + | ! scope="row" | Intel Xeon E5-2650 v2 (no HT) || 2.6GHz || 8 || 102373 || 39.374 || 12796 || 4.9218 || 8 Threads || | |
|- | |- | ||
| − | + | | colspan="9"|''AMD'' | |
|- | |- | ||
| − | ! scope="row" | | + | ! scope="row" | AMD Opteron 6380 || 2.5GHz || 16 || 128439 || 51.376 || 8027 || 3.2110 || 16 Threads || |
|- | |- | ||
|} | |} | ||
Latest revision as of 13:46, 19 November 2015
Decompress
root@jph1:~# tar zxvf coremark_v1.0
cd coremark_v1.0Adjust compilation flags
Seems to be in ./linux64/core_portme.mak
- Change
PORT_CFLAGSvalue to-O3 - Add extra flags to the end of the
CFLAGSstatement
For example (added -funroll-all-loops --param max-inline-insns-auto=200):
CC = gcc
# Flag: CFLAGS
# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
PORT_CFLAGS = -O3
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\" -funroll-all-loops --param max-inline-insns-auto=200- Version for APM XGene
#PORT_CFLAGS = -O2
PORT_CFLAGS = -static -O3 -flto -fwhole-program -funroll-all-loops -mnew-cost-model -mbranch-cost=8 -fipa-pta -DTIME -DHZ=50Run benchmark (single thread)
Run benchmark with make:
root@jph1:~/coremark_v1.0# pwd
/root/coremark_v1.0
root@jph1:~/coremark_v1.0# makeFaster systems
If you have a fast system (faster than a RaspberryPi), increase the number of iterations to increase the test run to give a more meaningful result:
root@jph1:~/coremark_v1.0# make ITERATIONS=1000032-bit ARM systems
Coremark seems to have an issue when not running in a linux64 environment (e.g. 32-bit ARM). Add a flag to the make command:
root@jph1:~/coremark_v1.0# make PORT_DIR=simpleRun benchmark (multi thread)
Rebuild from scratch
make clean
# NOTE APM ARM 64 flags below (just added -DMULTITHREAD=8 -DUSE_PTHREAD)
# edit the linux64/core_portme.mak file
PORT_CFLAGS = -static -O3 -flto -fwhole-program -funroll-all-loops -fipa-pta -DTIME -DHZ=50 -DMULTITHREAD=8 -DUSE_PTHREAD
..
LFLAGS_END += -lrt -lpthread
# makeJon on MIPS
root@jph1:~/coremark_v1.0# make XCFLAGS="-DMULTITHREAD=8 -DUSE_PTHREAD"Results
| CPU | Freq | Cores | Coremark | Coremark/ MHz |
Coremark/ Core |
Coremark/ Core/MHz |
Parallel Execution | Compile Flags |
|---|---|---|---|---|---|---|---|---|
| MIPS | ||||||||
| Cavium Octeon II CN6645 | 1.50GHz | 10 | 31172 | 20.781 | 3117 | 2.0781 | 10 Threads | -O3 -funroll-all-loops --param max-inline-insns-auto=200 |
| ARM | ||||||||
| Calxeda Highbank (Cortex A9) | 1.40GHz | 4 | 12074 | 8.624 | 3018 | 2.1557 | 4 Threads | -O3 |
| Calxeda Midway (Cortex A15) | 1.50GHz | 4 | 22013 | 14.675 | 5503 | 3.6687 | 4 Threads | -O3 |
| APM XGene1 (Cortex A57) | 2.40GHz | 8 | 54408 | 22.670 | 6801 | 2.8338 | 8 Threads | -O3 |
| AMD Seattle (Cortex A57) | 1.50GHz | 6 | 34834 | 23.223 | 5805 | 3.8704 | 6 Threads | -O3 |
| Intel | ||||||||
| Intel Atom C2550 | 2.4GHz | 4 | 29333 | 12.222 | 7333 | 3.0554 | 4 Threads | -O3 |
| Intel Atom C2750 | 2.4GHz | 8 | 64140 | 26.725 | 8017 | 3.340 | 8 Threads | -O3 Centos 7 |
| Intel Xeon E5-2660 v1 (no HT) | 2.2GHz | 8 | 105540 | 27.973 | 13192 | 5.9966 | 8 Threads | -O3 |
| Intel Xeon E5-2640 v2 (no HT) | 2.0GHz | 8 | 111095 | 55.548 | 13886 | 6.9434 | 8 Threads | -O3 |
| Intel Xeon E5-2650 v2 (no HT) | 2.6GHz | 8 | 120782 | 46.455 | 15097 | 5.8068 | 8 Threads | -O3 |
| Intel Xeon E5-2660 v2 (no HT) | 2.2GHz | 10 | 134147 | 60.976 | 13414 | 6.0976 | 10 Threads | -O3 |
| Intel Xeon D-1540 | 2.00GHz | 8 | 121285 | 60.6425 | 15160.6 | 7.58 | 8 Threads | -O3 |
| Intel Xeon E5-2630 v3 (HT) | 2.4GHz | 8 | 135249 | 56.35 | 16906 | 7.04 | 8 Threads | -O3 |
| Intel Xeon E5-2640 v3 (no HT) | 2.6GHz | 8 | 143377 | 55.15 | 17922 | 6.89 | 8 Threads | -O3 |
| Intel Xeon E5-2660 v3 (no HT) | 2.6GHz | 10 | 169481 | 65.18 | 16948 | 6.51 | 10 Threads | -O3 |
| Intel Xeon E5-2680 v3 (HT) | 2.5GHz | 12 | 193746 | 77.49 | 16145 | 6.45 | 12 Threads | -O3 |
| Intel Xeon E5-2697 v3 (no HT) | 2.6GHz | 14 | 250387 | Threads | -O3 | |||
| AMD | ||||||||
| AMD Opteron 6380 | 2.5GHz | 16 | 149107 | 59.643 | 9319 | 3.7277 | 16 Threads | -O3 |
| CPU | Freq | Cores | Coremark | Coremark/ MHz |
Coremark/ Core |
Coremark/ Core/MHz |
Parallel Execution | Compile Flags |
|---|---|---|---|---|---|---|---|---|
| MIPS | ||||||||
| Cavium Octeon II CN6645 | 2.50GHz | 10 | 25671 | 10.268 | 2567 | 1.0268 | 10 Threads | |
| ARM | ||||||||
| Calxeda Highbank (Cortex A9) | 1.40GHz | 4 | 11764 | 8.403 | 2941 | 2.1007 | 4 Threads | |
| Calxeda Midway (Cortex A15) | 1.50GHz | 4 | 20284 | 13.523 | 5071 | 3.3807 | 4 Threads | |
| APM XGene1 (Cortex A57) | 2.40GHz | 8 | 49304 | 20.543 | 6163 | 2.5679 | 8 Threads | |
| AMD Seattle (Cortex A57) | 1.50GHz | 6 | 27833 | 17.9 | 4638 | 3.0925 | 6 Threads | |
| Intel | ||||||||
| Intel Atom C2550 | 2.4GHz | 4 | 26044 | 10.852 | 6511 | 2.7129 | 4 Threads | |
| Intel Atom C2750 | 2.4GHz | 8 | 50873 | 21.197 | 6359 | 2.6496 | 8 Threads | |
| Intel Xeon E5-2660 v1 (no HT) | 2.2GHz | 8 | 91152 | 41.433 | 11394 | 5.1791 | 8 Threads | |
| Intel Xeon E5-2640 v2 (no HT) | 2.0GHz | 8 | 96455 | 48.228 | 12056 | 6.0284 | 8 Threads | |
| Intel Xeon E5-2650 v2 (no HT) | 2.6GHz | 8 | 102373 | 39.374 | 12796 | 4.9218 | 8 Threads | |
| AMD | ||||||||
| AMD Opteron 6380 | 2.5GHz | 16 | 128439 | 51.376 | 8027 | 3.2110 | 16 Threads | |