SLURM Job Example

This example shows how to run the SOL pipeline using an HPC SLURM submission script.

The job supports flexible step selection and can run individual pipeline steps or ranges.

“SOL.sh - Example SLURM submission script”
  1#!/bin/bash
  2#SBATCH --job-name=SOL_PIPELINE
  3#SBATCH --output=slurm/%x.out
  4#SBATCH --error=slurm/%x.err
  5#SBATCH --time=2-00:00:00
  6#SBATCH --partition=general
  7#SBATCH -n 1
  8#SBATCH --cpus-per-task=4
  9#SBATCH --mem=10G
 10#SBATCH --mail-type=ALL
 11#SBATCH --mail-user=ztcaterer@colorado.edu
 12
 13# ============================
 14#        USAGE
 15# sbatch sol_pipeline.sbatch        # runs steps 1–5 (default)
 16# sbatch sol_pipeline.sbatch 5      # runs only step 5
 17# sbatch sol_pipeline.sbatch 3-5    # runs steps 3 through 5
 18# ============================
 19
 20# Parse input argument
 21RANGE=${1:-1-9}  # Default: full pipeline if nothing provided
 22
 23# Convert argument to start and end numbers
 24if [[ "$RANGE" =~ ^([0-9]+)-([0-9]+)$ ]]; then
 25    START_STEP=${BASH_REMATCH[1]}
 26    END_STEP=${BASH_REMATCH[2]}
 27elif [[ "$RANGE" =~ ^[0-9]+$ ]]; then
 28    START_STEP=$RANGE
 29    END_STEP=$RANGE
 30elif [[ -z "$RANGE" ]]; then
 31    START_STEP=1
 32    END_STEP=5
 33else
 34    echo "❌ Invalid input: '$RANGE'"
 35    echo "Usage examples:"
 36    echo "  sbatch sol_pipeline.sbatch        # run all steps"
 37    echo "  sbatch sol_pipeline.sbatch 5      # run only step 5"
 38    echo "  sbatch sol_pipeline.sbatch 3-5    # run steps 3–5"
 39    exit 1
 40fi
 41
 42# Helper function to determine if a step should run
 43run_step() {
 44    local step_num=$1
 45    if (( step_num >= START_STEP && step_num <= END_STEP )); then
 46        return 0  # Run
 47    else
 48        return 1  # Skip
 49    fi
 50}
 51
 52echo "-------------------------------------------------"
 53echo " Running SOL pipeline steps $START_STEP through $END_STEP"
 54echo "-------------------------------------------------"
 55
 56# ------------------------------
 57# Environment Setup
 58# ------------------------------
 59module purge
 60module load anaconda
 61source $(conda info --base)/etc/profile.d/conda.sh 
 62eval "$(conda shell.bash hook)" 
 63conda activate lpa_vntr
 64
 65export WORK=/work/users/c/a/catererz
 66export USERS=/users/c/a/catererz
 67export HOME=/nas/longleaf/home/catererz
 68export EPI=/proj/epi
 69export SOL=$EPI/Genetic_Data_Center/SOL
 70export CRAM=$SOL/cram
 71export LPA=$HOME/epi/lpa
 72export GRID=$HOME/epi/GRiD
 73export SOFTWARE=$HOME/epi/software
 74
 75mkdir -p $WORK/grid
 76cd $WORK/grid
 77
 78REGIONS_FILE="$GRID/files/734_possible_coding_vntr_regions.IBD2R_gt_0.25.uniq.txt"
 79LPA_REGION=$(awk '$7=="LPA" {print $1,$2,$3}' "$REGIONS_FILE")
 80CHR=$(echo "$LPA_REGION" | awk '{print $1}')
 81START=$(echo "$LPA_REGION" | awk '{print $2}')
 82END=$(echo "$LPA_REGION" | awk '{print $3}')
 83
 84# ------------------------------
 85# Step 1: Ensure CRAM Indexes
 86# ------------------------------
 87if run_step 1; then
 88    echo "=== Step 1: Ensuring CRAM Indexes ==="
 89    for cram_file in $CRAM/*.cram; do
 90        if [ ! -f "${cram_file}.crai" ]; then
 91            echo "Index file for $cram_file not found. Creating index..." >> ensure_crai.log 2>&1
 92            grid ensure-crai --cram-file "$cram_file" >> ensure_crai.log 2>&1
 93        else
 94            echo "Index file for $cram_file already exists. Skipping..." >> ensure_crai.log 2>&1
 95        fi
 96    done
 97fi
 98
 99# ------------------------------
100# Step 2: Count Reads
101# ------------------------------
102if run_step 2; then
103    echo "=== Step 2: Counting Reads ==="
104    grid count-reads \
105        --cram-dir $CRAM \
106        --output-file count-reads.tsv \
107        --ref-fasta $GRID/files/hg38.fa \
108        --chrom chr$CHR \
109        --start $START \
110        --end $END \
111        --config $GRID/grid/config.yaml \
112        --threads 4 \
113        >> count_reads.log 2>&1
114fi
115
116# ------------------------------
117# Step 3: Run Mosdepth
118# ------------------------------
119if run_step 3; then
120    echo "=== Step 3: Running Mosdepth ==="
121    module load mosdepth
122    grid mosdepth \
123        --cram-dir $CRAM \
124        --output-file mosdepth.tsv \
125        --ref-fasta $GRID/files/hg38.fa \
126        --chrom chr$CHR \
127        --start $START \
128        --end $END \
129        --region-name LPA \
130        --work-dir $WORK/grid/mosdepth \
131        --by 1000 \
132        --fast \
133        --threads 4 \
134        >> mosdepth.log 2>&1
135fi
136
137# ------------------------------
138# Step 4: Normalize Mosdepth
139# ------------------------------
140if run_step 4; then
141    echo "=== Step 4: Normalizing Mosdepth ==="
142    grid normalize-mosdepth \
143        --mosdepth-dir $WORK/grid/mosdepth \
144        --repeat-mask $GRID/files/repeat_mask_list.hg38.ucsc_bed \
145        --chrom chr$CHR \
146        --start $START \
147        --end $END \
148        --threads 4 \
149        --min-depth 20 \
150        --max-depth 100 \
151        --top-frac 0.1 \
152        --output-file normalized_mosdepth.txt.gz \
153        >> normalize_mosdepth.log 2>&1
154fi
155
156# ------------------------------
157# Step 5: Find Neighbors
158# ------------------------------
159if run_step 5; then
160    echo "=== Step 5: Finding Neighbors ==="
161    grid find-neighbors \
162        --input-file normalized_mosdepth.txt.gz \
163        --output-file neighbors.txt \
164        --zmax 2.0 \
165        --n-neighbors 500 \
166        --sigma2-max 1000.0 \
167        >> find_neighbors.log 2>&1
168fi
169
170# ------------------------------
171# Step 6: Extract LPA Reference
172# ------------------------------
173if run_step 6; then
174    echo "=== Step 6: Extracting LPA Reference ==="
175    grid extract-reference \
176        --reference-fa $GRID/files/hg38.fa \
177        --bed-file $GRID/files/ref_hg38.bed \
178        --output-dir $WORK/grid \
179        --output-prefix lpa_reference \
180        >> extract_lpa_reference.log 2>&1
181fi
182
183# ------------------------------
184# Step 7: Realign Reads to LPA
185# ------------------------------
186if run_step 7; then
187    echo "=== Step 7: Realigning Reads to LPA ==="
188    grid lpa-realign \
189        --cram-dir $CRAM \
190        --output-file lpa_realigned.tsv \
191        --ref-fasta $GRID/files/hg38.fa \
192        --lpa-ref-fasta lpa_reference.fasta \
193        --positions-file $GRID/files/hardcoded_positions.txt \
194        --genome-build hg38 \
195        --chrom chr$CHR \
196        --start $START \
197        --end $END \
198        --threads 4 \
199        >> lpa_realign.log 2>&1
200fi
201
202
203# ------------------------------
204# Step 8: Estimate LPA DIPCN
205# ------------------------------
206if run_step 8; then
207    echo "=== Step 8: Estimating LPA DIP CN ==="
208    grid compute-dipcn \
209        --count-file lpa_realigned.tsv \
210        --neighbor-file neighbors.zMax2.0.txt.gz \
211        --output-prefix lpa_dipcn \
212        --n-neighbors 500 \
213        >> compute_dipcn.log 2>&1
214fi
215
216# ------------------------------
217# Step 9: Compute LPA KIV-2 CN
218# ------------------------------
219if run_step 9; then
220    echo "=== Step 9: Computing LPA KIV-2 CN ==="
221    grid estimate-kiv \
222        --exon1a lpa_dipcn.exon1A.dipCN.txt \
223        --exon1b lpa_dipcn.exon1B.dipCN.txt \
224        --output lpa_kiv2_cn.txt \
225        --format txt \
226        >> compute_kiv2_cn.log 2>&1
227fi
228
229
230echo "Pipeline complete for steps $START_STEP$END_STEP."

Usage

sbatch SOL.sh        # run all steps (1–9)
sbatch SOL.sh 5      # run only step 5
sbatch SOL.sh 3-7    # run steps 3 through 7