Last active
October 15, 2025 19:38
-
-
Save cmdcolin/4f2ccf037b4c3315d6eb36b0a4ec123d to your computer and use it in GitHub Desktop.
prepare_sv.sh
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| export OUT=/var/www/html/jbrowse2 | |
| sudo apt-get update | |
| sudo apt-get install nodejs wget apache2 tabix samtools minimap2 | |
| sudo service apache2 start | |
| ## confirm node.js greater than or equal to v18 is installed | |
| node --version | |
| sudo npm install -g @jbrowse/cli | |
| ## confirm that the jbrowse CLI is installed | |
| jbrowse --version | |
| ## "jbrowse create" downloads and unzips the latest version of JBrowse 2, and then we move it to a web directory | |
| jbrowse create tmpdir | |
| sudo mv tmpdir $OUT | |
| ## download and prepare the human genome assembly used by the C-GIAB project | |
| curl https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/references/GRCh38/GRCh38_GIABv3_no_alt_analysis_set_maskedGRC_decoys_MAP2K3_KMT2C_KCNJ18.fasta.gz > GRCh38_GIABv3.fa.gz | |
| gunzip GRCh38_GIABv3.fa.gz | |
| samtools faidx GRCh38_GIABv3.fa | |
| jbrowse add-assembly GRCh38_GIABv3.fa --out $OUT --load copy | |
| ## load the C-GIAB benchmark SVs in VCF format | |
| jbrowse add-track https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data_somatic/HG008/Liss_lab/analysis/NIST_HG008-T_somatic-stvar-CNV_DraftBenchmark_V0.4-20250714/GRCh38_HG008-T-V0.4_somatic-stvar_PASS.draftbenchmark.vcf.gz --out $OUT --category "Variant calls" | |
| ## load the C-GIAB benchmark CNVs from BED file format | |
| (echo "#chr"$'\t'"start"$'\t'"end"$'\t'"total_copy_number"$'\t'"hap1_copy_number"$'\t'"hap2_copy_number"$'\t'"name" && curl https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data_somatic/HG008/Liss_lab/analysis/NIST_HG008-T_somatic-stvar-CNV_DraftBenchmark_V0.4-20250714/GRCh38_HG008-T-V0.4_somatic-CNV_PASS.draftbenchmark.calls.bed) > GRCh38_HG008-T-V0.4_somatic-CNV_PASS.draftbenchmark.calls.bed | |
| jbrowse add-track GRCh38_HG008-T-V0.4_somatic-CNV_PASS.draftbenchmark.calls.bed --out $OUT --category "Variant calls" --load move | |
| ## convert remote BAM files to local CRAM files for faster processing | |
| ## note: these next two steps download over 200 gigabytes of data | |
| samtools view -@8 https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data_somatic/HG008/Liss_lab/PacBio_Revio_20240125/HG008-N-P_PacBio-HiFi-Revio_20240125_35x_GRCh38-GIABv3.bam --write-index -o HG008-N-P_PacBio-HiFi-Revio_20240125_35x_GRCh38-GIABv3.cram -T GRCh38_GIABv3.fa | |
| samtools view -@8 https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data_somatic/HG008/Liss_lab/HG008-T_PacBio-HiFi-Revio_20240125_116x_GRCh38-GIABv3.bam --write-index -o HG008-T_PacBio-HiFi-Revio_20240125_116x_GRCh38-GIABv3.cram -T GRCh38_GIABv3.fa | |
| ## download megadepth executable | |
| wget https://github.com/ChristopherWilks/megadepth/releases/download/1.2.0/megadepth | |
| chmod +x megadepth | |
| ## calculate read coverage for each CRAM file, and then add to jbrowse config | |
| for i in *.cram; do | |
| ./megadepth $i --bigwig | |
| jbrowse add-track $i --out $OUT --category "Reads" --load move | |
| jbrowse add-track $i.all.bw --out $OUT --category "Coverage" --load move | |
| done; | |
| ## download the "phased assembly" of HG008T (with 2 haplotypes) | |
| curl https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data_somatic/HG008/Liss_lab/analysis/Verkko_assemblies_05162024/HG008T/HG008T_verkko_v2.2.1_herro_corrected/PBhifi+20kbBCMhifi_UL_ONTq26_herro/assembly.haplotype1.fasta > HG008T.hap1.fa | |
| curl https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data_somatic/HG008/Liss_lab/analysis/Verkko_assemblies_05162024/HG008T/HG008T_verkko_v2.2.1_herro_corrected/PBhifi+20kbBCMhifi_UL_ONTq26_herro/assembly.haplotype2.fasta > HG008T.hap2.fa | |
| ## load the phased assembly of HG008T into JBrowse 2 | |
| samtools faidx HG008T.hap1.fa | |
| samtools faidx HG008T.hap2.fa | |
| jbrowse add-assembly HG008T.hap1.fa --load copy --out $OUT | |
| jbrowse add-assembly HG008T.hap2.fa --load copy --out $OUT | |
| ## create minimap2 alignment of both HG008T haplotypes against the reference | |
| ## note: these commands take approximately 20 minutes each | |
| minimap2 -t8 -cx asm5 GRCh38_GIABv3.fa HG008T.hap1.fa > HG008T.hap1.paf | |
| minimap2 -t8 -cx asm5 GRCh38_GIABv3.fa HG008T.hap2.fa > HG008T.hap2.paf | |
| ## add the minimap2 alignment of hap1 and hap 2 vs GRCh38 as "synteny tracks" | |
| jbrowse add-track HG008T.hap1.paf -a HG008T.hap1,GRCh38_GIABv3 --out $OUT --load copy | |
| jbrowse add-track HG008T.hap2.paf -a HG008T.hap2,GRCh38_GIABv3 --out $OUT --load copy | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment