Skip to content

Instantly share code, notes, and snippets.

@cmdcolin
Last active October 15, 2025 19:38
Show Gist options
  • Save cmdcolin/4f2ccf037b4c3315d6eb36b0a4ec123d to your computer and use it in GitHub Desktop.
Save cmdcolin/4f2ccf037b4c3315d6eb36b0a4ec123d to your computer and use it in GitHub Desktop.
prepare_sv.sh
#!/bin/bash
export OUT=/var/www/html/jbrowse2
sudo apt-get update
sudo apt-get install nodejs wget apache2 tabix samtools minimap2
sudo service apache2 start
## confirm node.js greater than or equal to v18 is installed
node --version
sudo npm install -g @jbrowse/cli
## confirm that the jbrowse CLI is installed
jbrowse --version
## "jbrowse create" downloads and unzips the latest version of JBrowse 2, and then we move it to a web directory
jbrowse create tmpdir
sudo mv tmpdir $OUT
## download and prepare the human genome assembly used by the C-GIAB project
curl https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/references/GRCh38/GRCh38_GIABv3_no_alt_analysis_set_maskedGRC_decoys_MAP2K3_KMT2C_KCNJ18.fasta.gz > GRCh38_GIABv3.fa.gz
gunzip GRCh38_GIABv3.fa.gz
samtools faidx GRCh38_GIABv3.fa
jbrowse add-assembly GRCh38_GIABv3.fa --out $OUT --load copy
## load the C-GIAB benchmark SVs in VCF format
jbrowse add-track https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data_somatic/HG008/Liss_lab/analysis/NIST_HG008-T_somatic-stvar-CNV_DraftBenchmark_V0.4-20250714/GRCh38_HG008-T-V0.4_somatic-stvar_PASS.draftbenchmark.vcf.gz --out $OUT --category "Variant calls"
## load the C-GIAB benchmark CNVs from BED file format
(echo "#chr"$'\t'"start"$'\t'"end"$'\t'"total_copy_number"$'\t'"hap1_copy_number"$'\t'"hap2_copy_number"$'\t'"name" && curl https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data_somatic/HG008/Liss_lab/analysis/NIST_HG008-T_somatic-stvar-CNV_DraftBenchmark_V0.4-20250714/GRCh38_HG008-T-V0.4_somatic-CNV_PASS.draftbenchmark.calls.bed) > GRCh38_HG008-T-V0.4_somatic-CNV_PASS.draftbenchmark.calls.bed
jbrowse add-track GRCh38_HG008-T-V0.4_somatic-CNV_PASS.draftbenchmark.calls.bed --out $OUT --category "Variant calls" --load move
## convert remote BAM files to local CRAM files for faster processing
## note: these next two steps download over 200 gigabytes of data
samtools view -@8 https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data_somatic/HG008/Liss_lab/PacBio_Revio_20240125/HG008-N-P_PacBio-HiFi-Revio_20240125_35x_GRCh38-GIABv3.bam --write-index -o HG008-N-P_PacBio-HiFi-Revio_20240125_35x_GRCh38-GIABv3.cram -T GRCh38_GIABv3.fa
samtools view -@8 https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data_somatic/HG008/Liss_lab/HG008-T_PacBio-HiFi-Revio_20240125_116x_GRCh38-GIABv3.bam --write-index -o HG008-T_PacBio-HiFi-Revio_20240125_116x_GRCh38-GIABv3.cram -T GRCh38_GIABv3.fa
## download megadepth executable
wget https://github.com/ChristopherWilks/megadepth/releases/download/1.2.0/megadepth
chmod +x megadepth
## calculate read coverage for each CRAM file, and then add to jbrowse config
for i in *.cram; do
./megadepth $i --bigwig
jbrowse add-track $i --out $OUT --category "Reads" --load move
jbrowse add-track $i.all.bw --out $OUT --category "Coverage" --load move
done;
## download the "phased assembly" of HG008T (with 2 haplotypes)
curl https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data_somatic/HG008/Liss_lab/analysis/Verkko_assemblies_05162024/HG008T/HG008T_verkko_v2.2.1_herro_corrected/PBhifi+20kbBCMhifi_UL_ONTq26_herro/assembly.haplotype1.fasta > HG008T.hap1.fa
curl https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data_somatic/HG008/Liss_lab/analysis/Verkko_assemblies_05162024/HG008T/HG008T_verkko_v2.2.1_herro_corrected/PBhifi+20kbBCMhifi_UL_ONTq26_herro/assembly.haplotype2.fasta > HG008T.hap2.fa
## load the phased assembly of HG008T into JBrowse 2
samtools faidx HG008T.hap1.fa
samtools faidx HG008T.hap2.fa
jbrowse add-assembly HG008T.hap1.fa --load copy --out $OUT
jbrowse add-assembly HG008T.hap2.fa --load copy --out $OUT
## create minimap2 alignment of both HG008T haplotypes against the reference
## note: these commands take approximately 20 minutes each
minimap2 -t8 -cx asm5 GRCh38_GIABv3.fa HG008T.hap1.fa > HG008T.hap1.paf
minimap2 -t8 -cx asm5 GRCh38_GIABv3.fa HG008T.hap2.fa > HG008T.hap2.paf
## add the minimap2 alignment of hap1 and hap 2 vs GRCh38 as "synteny tracks"
jbrowse add-track HG008T.hap1.paf -a HG008T.hap1,GRCh38_GIABv3 --out $OUT --load copy
jbrowse add-track HG008T.hap2.paf -a HG008T.hap2,GRCh38_GIABv3 --out $OUT --load copy
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment