<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>http://wiki.define-technology.com/mediawiki-1.35.0/index.php?action=history&amp;feed=atom&amp;title=Benchmarking%3A_HPL_on_a_GPU_using_CUDA</id>
	<title>Benchmarking: HPL on a GPU using CUDA - Revision history</title>
	<link rel="self" type="application/atom+xml" href="http://wiki.define-technology.com/mediawiki-1.35.0/index.php?action=history&amp;feed=atom&amp;title=Benchmarking%3A_HPL_on_a_GPU_using_CUDA"/>
	<link rel="alternate" type="text/html" href="http://wiki.define-technology.com/mediawiki-1.35.0/index.php?title=Benchmarking:_HPL_on_a_GPU_using_CUDA&amp;action=history"/>
	<updated>2026-05-04T17:20:14Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.35.0</generator>
	<entry>
		<id>http://wiki.define-technology.com/mediawiki-1.35.0/index.php?title=Benchmarking:_HPL_on_a_GPU_using_CUDA&amp;diff=1095&amp;oldid=prev</id>
		<title>David: Created page with &quot;Source and Build Instructions PDF are located on PDD: HPC Benchmarking/Applications/hpl-cuda  PDD Link: &lt;file&gt;\\srv-vfs2\PDD_DATA\Product Development\High Performance Computin...&quot;</title>
		<link rel="alternate" type="text/html" href="http://wiki.define-technology.com/mediawiki-1.35.0/index.php?title=Benchmarking:_HPL_on_a_GPU_using_CUDA&amp;diff=1095&amp;oldid=prev"/>
		<updated>2012-10-10T11:57:55Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;Source and Build Instructions PDF are located on PDD: HPC Benchmarking/Applications/hpl-cuda  PDD Link: &amp;lt;file&amp;gt;\\srv-vfs2\PDD_DATA\Product Development\High Performance Computin...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;Source and Build Instructions PDF are located on PDD: HPC Benchmarking/Applications/hpl-cuda&lt;br /&gt;
&lt;br /&gt;
PDD Link: &amp;lt;file&amp;gt;\\srv-vfs2\PDD_DATA\Product Development\High Performance Computing\HPC Benchmarking\Applications\hpl-cuda&amp;lt;/file&amp;gt;&lt;br /&gt;
&lt;br /&gt;
===== Build Source =====&lt;br /&gt;
* Built using: &lt;br /&gt;
** Platform mpi (/opt/platform_mpi)&lt;br /&gt;
** Intel MKL (/shared/intel/composer-2011, 12.0 compilers)&lt;br /&gt;
** CUDA 4.0 (/usr/local/cuda)&lt;br /&gt;
* Untar/gz, cd in to the directory and edit the &amp;lt;tt&amp;gt;Make.CUDA&amp;lt;/tt&amp;gt; file&lt;br /&gt;
&lt;br /&gt;
&amp;lt;syntaxhighlight&amp;gt;&lt;br /&gt;
# TOPDir around line 103&lt;br /&gt;
ifndef  TOPdir&lt;br /&gt;
TOPdir = /home/david/benchmarking/hpl-2.0_FERMI_v13&lt;br /&gt;
endif&lt;br /&gt;
&lt;br /&gt;
# openmpi section&lt;br /&gt;
MPdir        = /opt/platform_mpi/&lt;br /&gt;
MPinc        = -I$(MPdir)/include&lt;br /&gt;
MPlib        = $(MPdir)/lib/linux_amd64/libmpi.so&lt;br /&gt;
&lt;br /&gt;
# MKL LAdir/inc/lib&lt;br /&gt;
LAdir        = /shared/intel/composerxe-2011/mkl/lib/intel64/&lt;br /&gt;
LAinc        =&lt;br /&gt;
LAlib        = -L $(TOPdir)/src/cuda  -ldgemm -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -L$(LAdir) -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5&lt;br /&gt;
&lt;br /&gt;
# next two lines for Intel Compilers:&lt;br /&gt;
CC      = mpicc&lt;br /&gt;
CCFLAGS = $(HPL_DEFS) -O3 -axS -w -fomit-frame-pointer -funroll-loops -openmp&lt;br /&gt;
&lt;br /&gt;
# rest of the file should be ok straight from unzipping, build using make&lt;br /&gt;
&amp;lt;/syntaxhighlight&amp;gt;&lt;br /&gt;
&lt;br /&gt;
===== Build the binaries =====&lt;br /&gt;
&amp;lt;syntaxhighlight&amp;gt;&lt;br /&gt;
make &lt;br /&gt;
# which will end up producing&lt;br /&gt;
[david@vhpchead hpl-2.0_FERMI_v13]$ find bin/&lt;br /&gt;
bin/&lt;br /&gt;
bin/CUDA&lt;br /&gt;
bin/CUDA/xhpl&lt;br /&gt;
bin/CUDA/HPL.dat&lt;br /&gt;
bin/CUDA/HPL.dat_example&lt;br /&gt;
bin/CUDA/run_linpack&lt;br /&gt;
bin/CUDA/output_example&lt;br /&gt;
bin/CUDA/._HPL.dat&lt;br /&gt;
bin/CUDA/._run_linpack&lt;br /&gt;
&amp;lt;/syntaxhighlight&amp;gt;&lt;br /&gt;
&lt;br /&gt;
===== Edit run_linpack script =====&lt;br /&gt;
* In bin/CUDA/run_linpack, check the following is set:&lt;br /&gt;
&amp;lt;syntaxhighlight&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
&lt;br /&gt;
#location of HPL &lt;br /&gt;
HPL_DIR=/home/david/benchmarking/hpl-2.0_FERMI_v13&lt;br /&gt;
&lt;br /&gt;
# Number of CPU cores ( per GPU used = per MPI process )&lt;br /&gt;
CPU_CORES_PER_GPU=4&lt;br /&gt;
&lt;br /&gt;
# FOR MKL&lt;br /&gt;
export MKL_NUM_THREADS=$CPU_CORES_PER_GPU&lt;br /&gt;
# FOR GOTO&lt;br /&gt;
export GOTO_NUM_THREADS=$CPU_CORES_PER_GPU&lt;br /&gt;
# FOR OMP&lt;br /&gt;
export OMP_NUM_THREADS=$CPU_CORES_PER_GPU&lt;br /&gt;
&lt;br /&gt;
export MKL_DYNAMIC=FALSE&lt;br /&gt;
&lt;br /&gt;
# hint: for 2050 or 2070 card&lt;br /&gt;
#       try 350/(350 + MKL_NUM_THREADS*4*cpu frequency in GHz) &lt;br /&gt;
export CUDA_DGEMM_SPLIT=0.80&lt;br /&gt;
&lt;br /&gt;
# hint: try CUDA_DGEMM_SPLIT - 0.10&lt;br /&gt;
export CUDA_DTRSM_SPLIT=0.70&lt;br /&gt;
&lt;br /&gt;
export LD_LIBRARY_PATH=$HPL_DIR/src/cuda:$LD_LIBRARY_PATH&lt;br /&gt;
&lt;br /&gt;
$HPL_DIR/bin/CUDA/xhpl&lt;br /&gt;
&amp;lt;/syntaxhighlight&amp;gt;&lt;br /&gt;
&lt;br /&gt;
===== Run on a Single GPU =====&lt;br /&gt;
&lt;br /&gt;
===== Results =====&lt;br /&gt;
* From a E5620 system with 2x M2075&lt;br /&gt;
&amp;lt;syntaxhighlight&amp;gt;&lt;br /&gt;
# CPU_CORES_PER_GPU=8&lt;br /&gt;
# CUDA_DGEMM_SPLIT=0.80&lt;br /&gt;
# CUDA_DTRSM_SPLIT=0.70&lt;br /&gt;
================================================================================&lt;br /&gt;
T/V                N    NB     P     Q               Time                 Gflops&lt;br /&gt;
--------------------------------------------------------------------------------&lt;br /&gt;
WR10L2L2      108032  1024     1     2            1170.08              7.184e+02&lt;br /&gt;
--------------------------------------------------------------------------------&lt;br /&gt;
||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)=        0.0041656 ...... PASSED&lt;br /&gt;
================================================================================&lt;br /&gt;
&amp;lt;/syntaxhighlight&amp;gt;&lt;/div&gt;</summary>
		<author><name>David</name></author>
	</entry>
</feed>