Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision | ||
habrok:advanced_job_management:many_file_jobs [2023/09/19 18:01] – [Running your computations] camarocico | habrok:advanced_job_management:many_file_jobs [2023/10/02 12:20] (current) – Removed verbose option from extraction command aurel | ||
---|---|---|---|
Line 13: | Line 13: | ||
< | < | ||
mkdir $TMPDIR/ | mkdir $TMPDIR/ | ||
- | tar xvzf / | + | tar xzf / |
</ | </ | ||
Line 28: | Line 28: | ||
module load TensorFlow/ | module load TensorFlow/ | ||
+ | mkdir -p $TMPDIR/ | ||
cp / | cp / | ||
cd $TMPDIR | cd $TMPDIR | ||
- | python train.py | + | python train.py |
</ | </ | ||
+ | |||
+ | The script '' | ||
==== Copying results to shared storage ==== | ==== Copying results to shared storage ==== | ||
Line 38: | Line 41: | ||
< | < | ||
- | tar czvf / | + | mkdir -p / |
+ | tar czvf /scratch/$USER/rice_classifier/job_${SLURM_JOBID}/ | ||
</ | </ | ||
Line 47: | Line 51: | ||
To facilitate jobs using this method, below is an example script; you can simply replace all instances of ' | To facilitate jobs using this method, below is an example script; you can simply replace all instances of ' | ||
< | < | ||
- | #!/usr/bin/env bash | + | # |
+ | #SBATCH --job-name=rice_classifier | ||
+ | #SBATCH --output=rice_classifier.out | ||
#SBATCH --time=00: | #SBATCH --time=00: | ||
#SBATCH --nodes=1 | #SBATCH --nodes=1 | ||
#SBATCH --ntasks=1 | #SBATCH --ntasks=1 | ||
- | #SBATCH --cpus-per-task=1 | + | #SBATCH --cpus-per-task=16 |
- | #SBATCH --mem=2GB | + | #SBATCH --mem=4GB |
- | #SBATCH --partition=gpu | + | #SBATCH --partition=regular |
- | #SBATCH --gres=gpu: | + | |
- | # Change directory to local directory | + | mkdir $TMPDIR/ |
- | cd $TMPDIR | + | mkdir -p $TMPDIR/ |
# Extract tar file (which could be stored on /scratch) to local disk | # Extract tar file (which could be stored on /scratch) to local disk | ||
- | tar xvzf /scratch/$USER/path/to/compressed.tar.gz $TMPDIR | + | tar xzf /scratch/public/hb-courses/basic/inputfiles/ |
+ | cp / | ||
+ | cd $TMPDIR | ||
- | # Your code goes here | + | # # Load modules |
- | # Load modules | + | module load matplotlib/ |
- | # Run scripts | + | module load TensorFlow/ |
- | # etc. | + | |
+ | # Run the training | ||
+ | python train.py 3 | ||
- | tar czvf / | + | mkdir -p / |
+ | tar czvf /scratch/$USER/rice_classifier/job_${SLURM_JOBID}/ | ||
</ | </ | ||
Line 101: | Line 110: | ||
</ | </ | ||
- | This will create a folder '' | + | This will create a folder '' |
Because of the way the command '' | Because of the way the command '' | ||
< | < | ||
- | python | + | python |
wait | wait | ||
</ | </ | ||
Line 115: | Line 124: | ||
< | < | ||
- | #!/usr/bin/env bash | + | # |
- | + | #SBATCH --job-name=rice_classifier | |
- | #SBATCH --time=00:10:00 | + | #SBATCH --output=rice_classifier.out |
+ | #SBATCH --time=00:30:00 | ||
#SBATCH --nodes=1 | #SBATCH --nodes=1 | ||
#SBATCH --ntasks=1 | #SBATCH --ntasks=1 | ||
- | #SBATCH --cpus-per-task=1 | + | #SBATCH --cpus-per-task=16 |
- | #SBATCH --mem=2GB | + | #SBATCH --mem=4GB |
- | #SBATCH --partition=gpu | + | #SBATCH --partition=regular |
- | #SBATCH --gres=gpu: | + | |
#SBATCH --signal=B: | #SBATCH --signal=B: | ||
- | # Change directory to local directory | + | mkdir $TMPDIR/dataset |
- | cd $TMPDIR | + | |
# Extract tar file (which could be stored on /scratch) to local disk | # Extract tar file (which could be stored on /scratch) to local disk | ||
- | tar xvzf /scratch/$USER/path/to/compressed.tar.gz $TMPDIR | + | tar xzf /scratch/public/hb-courses/basic/inputfiles/ |
+ | cp / | ||
+ | cd $TMPDIR | ||
- | trap 'mkdir / | + | # Compress and save the results if the timelimit is close |
+ | trap ' | ||
# Load modules | # Load modules | ||
- | module load Python/3.10.8-GCCcore-12.2.0 | + | module load matplotlib/3.5.2-foss-2022a |
+ | module load TensorFlow/2.11.0-foss-2022a-CUDA-11.7.0 | ||
+ | |||
+ | # Create folders for final results | ||
+ | mkdir -p $TMPDIR/ | ||
# Run in the background and wait | # Run in the background and wait | ||
- | python | + | python |
wait | wait | ||
+ | |||
+ | mkdir -p / | ||
+ | tar czvf / | ||
</ | </ | ||