SLURM Job Example¶
This example shows how to run the SOL pipeline using an HPC SLURM submission script.
The job supports flexible step selection and can run individual pipeline steps or ranges.
“SOL.sh - Example SLURM submission script”¶
1#!/bin/bash
2#SBATCH --job-name=SOL_PIPELINE
3#SBATCH --output=slurm/%x.out
4#SBATCH --error=slurm/%x.err
5#SBATCH --time=2-00:00:00
6#SBATCH --partition=general
7#SBATCH -n 1
8#SBATCH --cpus-per-task=4
9#SBATCH --mem=10G
10#SBATCH --mail-type=ALL
11#SBATCH --mail-user=ztcaterer@colorado.edu
12
13# ============================
14# USAGE
15# sbatch sol_pipeline.sbatch # runs steps 1–5 (default)
16# sbatch sol_pipeline.sbatch 5 # runs only step 5
17# sbatch sol_pipeline.sbatch 3-5 # runs steps 3 through 5
18# ============================
19
20# Parse input argument
21RANGE=${1:-1-9} # Default: full pipeline if nothing provided
22
23# Convert argument to start and end numbers
24if [[ "$RANGE" =~ ^([0-9]+)-([0-9]+)$ ]]; then
25 START_STEP=${BASH_REMATCH[1]}
26 END_STEP=${BASH_REMATCH[2]}
27elif [[ "$RANGE" =~ ^[0-9]+$ ]]; then
28 START_STEP=$RANGE
29 END_STEP=$RANGE
30elif [[ -z "$RANGE" ]]; then
31 START_STEP=1
32 END_STEP=5
33else
34 echo "❌ Invalid input: '$RANGE'"
35 echo "Usage examples:"
36 echo " sbatch sol_pipeline.sbatch # run all steps"
37 echo " sbatch sol_pipeline.sbatch 5 # run only step 5"
38 echo " sbatch sol_pipeline.sbatch 3-5 # run steps 3–5"
39 exit 1
40fi
41
42# Helper function to determine if a step should run
43run_step() {
44 local step_num=$1
45 if (( step_num >= START_STEP && step_num <= END_STEP )); then
46 return 0 # Run
47 else
48 return 1 # Skip
49 fi
50}
51
52echo "-------------------------------------------------"
53echo " Running SOL pipeline steps $START_STEP through $END_STEP"
54echo "-------------------------------------------------"
55
56# ------------------------------
57# Environment Setup
58# ------------------------------
59module purge
60module load anaconda
61source $(conda info --base)/etc/profile.d/conda.sh
62eval "$(conda shell.bash hook)"
63conda activate lpa_vntr
64
65export WORK=/work/users/c/a/catererz
66export USERS=/users/c/a/catererz
67export HOME=/nas/longleaf/home/catererz
68export EPI=/proj/epi
69export SOL=$EPI/Genetic_Data_Center/SOL
70export CRAM=$SOL/cram
71export LPA=$HOME/epi/lpa
72export GRID=$HOME/epi/GRiD
73export SOFTWARE=$HOME/epi/software
74
75mkdir -p $WORK/grid
76cd $WORK/grid
77
78REGIONS_FILE="$GRID/files/734_possible_coding_vntr_regions.IBD2R_gt_0.25.uniq.txt"
79LPA_REGION=$(awk '$7=="LPA" {print $1,$2,$3}' "$REGIONS_FILE")
80CHR=$(echo "$LPA_REGION" | awk '{print $1}')
81START=$(echo "$LPA_REGION" | awk '{print $2}')
82END=$(echo "$LPA_REGION" | awk '{print $3}')
83
84# ------------------------------
85# Step 1: Ensure CRAM Indexes
86# ------------------------------
87if run_step 1; then
88 echo "=== Step 1: Ensuring CRAM Indexes ==="
89 for cram_file in $CRAM/*.cram; do
90 if [ ! -f "${cram_file}.crai" ]; then
91 echo "Index file for $cram_file not found. Creating index..." >> ensure_crai.log 2>&1
92 grid ensure-crai --cram-file "$cram_file" >> ensure_crai.log 2>&1
93 else
94 echo "Index file for $cram_file already exists. Skipping..." >> ensure_crai.log 2>&1
95 fi
96 done
97fi
98
99# ------------------------------
100# Step 2: Count Reads
101# ------------------------------
102if run_step 2; then
103 echo "=== Step 2: Counting Reads ==="
104 grid count-reads \
105 --cram-dir $CRAM \
106 --output-file count-reads.tsv \
107 --ref-fasta $GRID/files/hg38.fa \
108 --chrom chr$CHR \
109 --start $START \
110 --end $END \
111 --config $GRID/grid/config.yaml \
112 --threads 4 \
113 >> count_reads.log 2>&1
114fi
115
116# ------------------------------
117# Step 3: Run Mosdepth
118# ------------------------------
119if run_step 3; then
120 echo "=== Step 3: Running Mosdepth ==="
121 module load mosdepth
122 grid mosdepth \
123 --cram-dir $CRAM \
124 --output-file mosdepth.tsv \
125 --ref-fasta $GRID/files/hg38.fa \
126 --chrom chr$CHR \
127 --start $START \
128 --end $END \
129 --region-name LPA \
130 --work-dir $WORK/grid/mosdepth \
131 --by 1000 \
132 --fast \
133 --threads 4 \
134 >> mosdepth.log 2>&1
135fi
136
137# ------------------------------
138# Step 4: Normalize Mosdepth
139# ------------------------------
140if run_step 4; then
141 echo "=== Step 4: Normalizing Mosdepth ==="
142 grid normalize-mosdepth \
143 --mosdepth-dir $WORK/grid/mosdepth \
144 --repeat-mask $GRID/files/repeat_mask_list.hg38.ucsc_bed \
145 --chrom chr$CHR \
146 --start $START \
147 --end $END \
148 --threads 4 \
149 --min-depth 20 \
150 --max-depth 100 \
151 --top-frac 0.1 \
152 --output-file normalized_mosdepth.txt.gz \
153 >> normalize_mosdepth.log 2>&1
154fi
155
156# ------------------------------
157# Step 5: Find Neighbors
158# ------------------------------
159if run_step 5; then
160 echo "=== Step 5: Finding Neighbors ==="
161 grid find-neighbors \
162 --input-file normalized_mosdepth.txt.gz \
163 --output-file neighbors.txt \
164 --zmax 2.0 \
165 --n-neighbors 500 \
166 --sigma2-max 1000.0 \
167 >> find_neighbors.log 2>&1
168fi
169
170# ------------------------------
171# Step 6: Extract LPA Reference
172# ------------------------------
173if run_step 6; then
174 echo "=== Step 6: Extracting LPA Reference ==="
175 grid extract-reference \
176 --reference-fa $GRID/files/hg38.fa \
177 --bed-file $GRID/files/ref_hg38.bed \
178 --output-dir $WORK/grid \
179 --output-prefix lpa_reference \
180 >> extract_lpa_reference.log 2>&1
181fi
182
183# ------------------------------
184# Step 7: Realign Reads to LPA
185# ------------------------------
186if run_step 7; then
187 echo "=== Step 7: Realigning Reads to LPA ==="
188 grid lpa-realign \
189 --cram-dir $CRAM \
190 --output-file lpa_realigned.tsv \
191 --ref-fasta $GRID/files/hg38.fa \
192 --lpa-ref-fasta lpa_reference.fasta \
193 --positions-file $GRID/files/hardcoded_positions.txt \
194 --genome-build hg38 \
195 --chrom chr$CHR \
196 --start $START \
197 --end $END \
198 --threads 4 \
199 >> lpa_realign.log 2>&1
200fi
201
202
203# ------------------------------
204# Step 8: Estimate LPA DIPCN
205# ------------------------------
206if run_step 8; then
207 echo "=== Step 8: Estimating LPA DIP CN ==="
208 grid compute-dipcn \
209 --count-file lpa_realigned.tsv \
210 --neighbor-file neighbors.zMax2.0.txt.gz \
211 --output-prefix lpa_dipcn \
212 --n-neighbors 500 \
213 >> compute_dipcn.log 2>&1
214fi
215
216# ------------------------------
217# Step 9: Compute LPA KIV-2 CN
218# ------------------------------
219if run_step 9; then
220 echo "=== Step 9: Computing LPA KIV-2 CN ==="
221 grid estimate-kiv \
222 --exon1a lpa_dipcn.exon1A.dipCN.txt \
223 --exon1b lpa_dipcn.exon1B.dipCN.txt \
224 --output lpa_kiv2_cn.txt \
225 --format txt \
226 >> compute_kiv2_cn.log 2>&1
227fi
228
229
230echo "Pipeline complete for steps $START_STEP–$END_STEP."
Usage¶
sbatch SOL.sh # run all steps (1–9)
sbatch SOL.sh 5 # run only step 5
sbatch SOL.sh 3-7 # run steps 3 through 7