Difference between revisions of "Multiple Program Runs in one Slurm Job"

From HPC Wiki
Jump to navigation Jump to search
m
Line 17: Line 17:
  
 
#SBATCH  --job-name=run2x24   
 
#SBATCH  --job-name=run2x24   
 
### output-Pfad und error-file-Pfad
 
 
#SBATCH --output=%j.log
 
#SBATCH --output=%j.log
 
#SBATCH --error=%j.err
 
#SBATCH --error=%j.err
 
### Anzahl Stunden  dd-hh:mm:ss
 
 
#SBATCH --time=00-01:00:00
 
#SBATCH --time=00-01:00:00
 
 
#SBATCH --mem=180G
 
#SBATCH --mem=180G
 
+
### exclusive usage of a single node
### Anzahl Prozessoren
+
#SBATCH --exclusive
 +
### use all cores, one thread per core
 
#SBATCH --ntasks=1 --nodes=1
 
#SBATCH --ntasks=1 --nodes=1
 
#SBATCH --cpus-per-task=48
 
#SBATCH --cpus-per-task=48
 
#SBATCH --threads-per-core=1
 
#SBATCH --threads-per-core=1
  
### exclusive usage of a single node ?
+
### prepare your environment for running gaussian
#SBATCH --exclusive
 
 
 
### CLAIX-2018
 
#SBATCH --partition=c18m
 
 
 
### use Project account once accounting is implemented ##SBATCH --account rwth0303   
 
 
 
### send email at job start and end
 
#SBATCH --mail-type=ALL
 
#SBATCH --mail-user=anmey@itc.rwth-aachen.de
 
 
 
 
module load CHEMISTRY gaussian
 
module load CHEMISTRY gaussian
### the gaussian module allocates the scratch directory
+
### make sure this environment variable points to a suitable location
 +
### here the gaussian module allocates the scratch directory
 
echo $GAUSS_SCRDIR
 
echo $GAUSS_SCRDIR
  
 
### adjust working directory and input file names and output directory names
 
### adjust working directory and input file names and output directory names
export WDIR=/home/da026566/hpc/benchmarks/Gaussian/Raabe
+
export WDIR=....
  
export INP1=small.inp24-10gb
+
export INP1=small1.inp24
export INP2=small.inp24-10gb
+
export INP2=small2.inp24
  
 
export OUT1=run1
 
export OUT1=run1
Line 60: Line 46:
 
### Scratch files will be put in $GAUSS_SCRDIR/$SLURM_JOB_ID/$OUTx
 
### Scratch files will be put in $GAUSS_SCRDIR/$SLURM_JOB_ID/$OUTx
 
### Input files are assumed to be in $WDIR/$INPx
 
### Input files are assumed to be in $WDIR/$INPx
 
 
mkdir -p $WDIR/$SLURM_JOB_ID/$OUT1
 
mkdir -p $WDIR/$SLURM_JOB_ID/$OUT1
 
mkdir -p $WDIR/$SLURM_JOB_ID/$OUT2
 
mkdir -p $WDIR/$SLURM_JOB_ID/$OUT2
Line 82: Line 67:
 
<syntaxhighlight lang="bash">
 
<syntaxhighlight lang="bash">
 
#!/usr/local_rwth/bin/zsh
 
#!/usr/local_rwth/bin/zsh
 
+
#SBATCH  --job-name=run4x12  
#SBATCH  --job-name=Salvarsan_hexa_S_DFT_fine_CAM_small  
 
 
 
### output-Pfad und error-file-Pfad
 
 
#SBATCH --output=%j.log
 
#SBATCH --output=%j.log
 
#SBATCH --error=%j.err
 
#SBATCH --error=%j.err
 
### require access to Lustre Filesystem (HPCWORK) ? ## SBATCH -C hpcwork
 
 
### Anzahl Stunden  dd-hh:mm:ss
 
 
#SBATCH --time=00-01:00:00
 
#SBATCH --time=00-01:00:00
 +
#SBATCH --mem=180G
  
#SBATCH --mem=45G
+
### exclusive usage of a single node
 
+
#SBATCH --exclusive
### Anzahl Prozessoren
+
### use all cores, one thread per core
 
#SBATCH --ntasks=1 --nodes=1
 
#SBATCH --ntasks=1 --nodes=1
 
#SBATCH --cpus-per-task=48
 
#SBATCH --cpus-per-task=48
 
#SBATCH --threads-per-core=1
 
#SBATCH --threads-per-core=1
  
### exclusive usage of a single node ?
+
### prepare your environment for running gaussian
#SBATCH --exclusive
 
 
 
### CLAIX-2018
 
#SBATCH --partition=c18m
 
 
 
### use Project account once accounting is implemented ##SBATCH --account rwth0303   
 
 
 
### send email at job start and end
 
#SBATCH --mail-type=ALL
 
#SBATCH --mail-user=anmey@itc.rwth-aachen.de
 
 
 
 
module load CHEMISTRY gaussian
 
module load CHEMISTRY gaussian
### the gaussian module allocates the scratch directory
+
### make sure this environment variable points to a suitable location
 +
### here the gaussian module allocates the scratch directory
 
echo $GAUSS_SCRDIR
 
echo $GAUSS_SCRDIR
  
 
### adjust working directory and input file names and output directory names
 
### adjust working directory and input file names and output directory names
export WDIR=/home/da026566/hpc/benchmarks/Gaussian/Raabe
+
export WDIR=....
  
export INP1=small.inp12-5gb
+
export INP1=small1.inp12
export INP2=small.inp12-5gb
+
export INP2=small2.inp12
export INP3=small.inp12-5gb
+
export INP3=small3.inp12
export INP4=small.inp12-5gb
+
export INP4=small4.inp12
  
 
export OUT1=run1
 
export OUT1=run1
Line 142: Line 111:
 
mkdir -p $GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT3
 
mkdir -p $GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT3
 
mkdir -p $GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT4
 
mkdir -p $GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT4
 
uptime
 
date
 
  
 
( cd $WDIR/$SLURM_JOB_ID/$OUT1; \
 
( cd $WDIR/$SLURM_JOB_ID/$OUT1; \
Line 164: Line 130:
 
wait $pid1 $pid2 $pid3 $pid4
 
wait $pid1 $pid2 $pid3 $pid4
  
date
 
uptime
 
cd $WDIR
 
ls -l $WDIR/$SLURM_JOB_ID/*/*
 
ls -l $GAUSS_SCRDIR/$SLURM_JOB_ID/*/*
 
 
</syntaxhighlight>
 
</syntaxhighlight>
  

Revision as of 19:04, 20 March 2019

Work in progress ...


Mehrfaches Starten von Programm mit etwas gleicher Laufzeit in einem Batchjob auf einem Multicore-Knoten am Beispiel von Gaussian.


Basic usage

Problem: Ein Programm (hier Gaussian) skaliert nicht gut über alle Cores eines Multicore-Knoten. Bei nicht-exklusiver Nutzung von solchen Rechenknoten laufen Jobs mehrere Nutzer gleichzeitig und beeinflussen sich gegenseitig in ihrer Laufzeit. Eine zuverlässige Abschätzung der Laufzeit zur Angabe des Rechenzeitlimits fällt dadurch schwer. Eine Maßnahme dagegen wäre das Starten von mehreren Programmläufen innerhalb eines Batchjobs, das einen Knoten exklusiv nutzt. Bei üblicher NUMA-Architektur von solchen Multicore-Knoten ist es wichtig die einzelnen Programmläufe sorgfältig zu platzieren - z.B. ein Programmlauf pro NUMA-Node.

#!/usr/local_rwth/bin/zsh

#SBATCH  --job-name=run2x24  
#SBATCH --output=%j.log
#SBATCH --error=%j.err
#SBATCH --time=00-01:00:00
#SBATCH --mem=180G
### exclusive usage of a single node
#SBATCH --exclusive
### use all cores, one thread per core
#SBATCH --ntasks=1 --nodes=1
#SBATCH --cpus-per-task=48
#SBATCH --threads-per-core=1

### prepare your environment for running gaussian
module load CHEMISTRY gaussian
### make sure this environment variable points to a suitable location
### here the gaussian module allocates the scratch directory
echo $GAUSS_SCRDIR

### adjust working directory and input file names and output directory names
export WDIR=....

export INP1=small1.inp24
export INP2=small2.inp24

export OUT1=run1
export OUT2=run2

### the program will run in $WDIR/$SLURM_JOB_ID/$OUTx
### Scratch files will be put in $GAUSS_SCRDIR/$SLURM_JOB_ID/$OUTx
### Input files are assumed to be in $WDIR/$INPx
mkdir -p $WDIR/$SLURM_JOB_ID/$OUT1
mkdir -p $WDIR/$SLURM_JOB_ID/$OUT2
mkdir -p $GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT1
mkdir -p $GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT2

numactl --cpubind=0,1 --membind=0,1 -- numactl -show
numactl --cpubind=2,3 --membind=2,3 -- numactl -show

( cd $WDIR/$SLURM_JOB_ID/$OUT1; \
  export GAUSS_SCRDIR=$GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT1; \
  numactl --cpubind=0,1 --membind=0,1 -- timex g09 < ../../$INP1 > g09.out 2> g09.err ) &
pid1=$!
( cd $WDIR/$SLURM_JOB_ID/$OUT2; \
  export GAUSS_SCRDIR=$GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT2; \
  numactl --cpubind=2,3 --membind=2,3 -- timex g09 < ../../$INP2 > g09.out 2> g09.err ) &
pid2=$!

wait $pid1 $pid2
#!/usr/local_rwth/bin/zsh
#SBATCH  --job-name=run4x12  
#SBATCH --output=%j.log
#SBATCH --error=%j.err
#SBATCH --time=00-01:00:00
#SBATCH --mem=180G

### exclusive usage of a single node
#SBATCH --exclusive
### use all cores, one thread per core
#SBATCH --ntasks=1 --nodes=1
#SBATCH --cpus-per-task=48
#SBATCH --threads-per-core=1

### prepare your environment for running gaussian
module load CHEMISTRY gaussian
### make sure this environment variable points to a suitable location
### here the gaussian module allocates the scratch directory
echo $GAUSS_SCRDIR

### adjust working directory and input file names and output directory names
export WDIR=....

export INP1=small1.inp12
export INP2=small2.inp12
export INP3=small3.inp12
export INP4=small4.inp12

export OUT1=run1
export OUT2=run2
export OUT3=run3
export OUT4=run4

### the program will run in $WDIR/$SLURM_JOB_ID/$OUTx
### Scratch files will be put in $GAUSS_SCRDIR/$SLURM_JOB_ID/$OUTx
### Input files are assumed to be in $WDIR/$INPx

mkdir -p $WDIR/$SLURM_JOB_ID/$OUT1
mkdir -p $WDIR/$SLURM_JOB_ID/$OUT2
mkdir -p $WDIR/$SLURM_JOB_ID/$OUT3
mkdir -p $WDIR/$SLURM_JOB_ID/$OUT4
mkdir -p $GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT1
mkdir -p $GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT2
mkdir -p $GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT3
mkdir -p $GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT4

( cd $WDIR/$SLURM_JOB_ID/$OUT1; \
  export GAUSS_SCRDIR=$GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT1; \
  numactl --cpubind=0 --membind=0 -- timex g09 < ../../$INP1 > g09.out  ) &
pid1=$!
( cd $WDIR/$SLURM_JOB_ID/$OUT2; \
  export GAUSS_SCRDIR=$GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT2; \
  numactl --cpubind=1 --membind=1 -- timex g09 < ../../$INP2 > g09.out  ) &
pid2=$!
( cd $WDIR/$SLURM_JOB_ID/$OUT3; \
  export GAUSS_SCRDIR=$GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT3; \
  numactl --cpubind=2 --membind=2 -- timex g09 < ../../$INP3 > g09.out  ) &
pid3=$!
( cd $WDIR/$SLURM_JOB_ID/$OUT4; \
  export GAUSS_SCRDIR=$GAUSS_SCRDIR/$SLURM_JOB_ID/$OUT4; \
  numactl --cpubind=3 --membind=3 -- timex g09 < ../../$INP4 > g09.out  ) &

wait $pid1 $pid2 $pid3 $pid4


Links and more Information

t.b.a.