Created
June 10, 2017 04:53
-
-
Save claczny/505e5c78132b29c198b03b784d19e141 to your computer and use it in GitHub Desktop.
Makefile for downloading full-length amplicon sequencing MinION data from Benitez-Paez & Sanz: http://biorxiv.org/content/early/2017/06/08/117143
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SHELL = /bin/bash | |
DDIR = orig_data | |
RDIR = results | |
PORETOOLS_BIN = . venv/bin/activate; poretools | |
PULLSEQ_BIN = ml vizbins_little_helpers; pullseq | |
MIN_LENGTH ?= 1000 | |
##### | |
# BEAUTY TARGETS | |
##### | |
.PHONY: all | |
all: | |
setup_env: venv/bin/poretools | |
download_data: $(DDIR)/reads_R9.4.tar.gz | |
unzip_data: $(DDIR)/reads_R9.4/unzip.done | |
convert_to_fasta: $(RDIR)/read_R9.4/reads.fasta | |
size_select_fasta: $(RDIR)/read_R9.4/reads.ge_$(MIN_LENGTH)nt.fasta | |
##### | |
# ACTUAL TARGETS | |
##### | |
venv/bin/activate: | |
@echo "##### SET-UP VIRTUALENV #####" | |
@date | |
virtualenv -p python2 venv | |
@date | |
@echo "##########" | |
@echo | |
venv/bin/poretools: venv/bin/activate | |
@echo "##### SET-UP PORETOOLS #####" | |
@date | |
. $^; pip install poretools | |
@date | |
@echo "##########" | |
@echo | |
# $(DDIR)/reads.tar.gz: | |
# @echo "##### DOWNLOADING RAW DATA #####" | |
# @date | |
# mkdir -p $(dir $@) | |
# wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA695/ERA695631/oxfordnanopore_native/reads.tar.gz -O $@ | |
# @date | |
# @echo "##########" | |
# @echo | |
$(DDIR)/reads_R9.4.tar.gz: | |
@echo "##### DOWNLOADING RAW DATA #####" | |
@date | |
mkdir -p $(dir $@) | |
wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA879/ERA879700/oxfordnanopore_native/reads_R9.4.tar.gz -O $@ | |
@date | |
@echo "##########" | |
@echo | |
$(DDIR)/reads_R9.4/unzip.done: $(DDIR)/reads_R9.4.tar.gz | |
@echo "##### UNZIPPING RAW DATA #####" | |
@date | |
mkdir -p $(dir $@) | |
tar -xzf $^ -C $(dir $@) && touch $@ | |
@date | |
@echo "##########" | |
@echo | |
$(RDIR)/read_R9.4/reads.fasta: $(DDIR)/reads_R9.4/unzip.done | |
@echo "##### CONVERTING FAST5 TO FASTA #####" | |
@date | |
mkdir -p $(dir $@) | |
$(PORETOOLS_BIN) fasta $(dir $^)/pass --type all > $@ | |
@date | |
@echo "##########" | |
@echo | |
%.ge_$(MIN_LENGTH)nt.fasta: %.fasta | |
$(PULLSEQ_BIN) -i $^ -m $(MIN_LENGTH) > $@ | |
##### | |
# CLEAN-UP | |
##### | |
clean: | |
@echo "TODO: clean" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
reads.tar.gz
is commented out due to the large file size of 34 GB IIRC.Might be interesting too look into those reads as well, yet they are based on (older) R9 chemistry, AFAIK.