claczny · September 5, 2016 08:22 · claczny · Sep 5, 2016
diff --git a/compute_coverage.mk b/compute_coverage.mk
 SHELL=/bin/bash

 SAMPLE?=<YOUR_SAMPLE>
 DOUBLED_SAMPLE = $(SAMPLE)/$(SAMPLE)

 RDIR?=results
 DDIR?=data

 #####
 # BEAUTY TARGETS
 #####

 .PHONY: all extract_fastq bam2sam sort_bam index_bam genomecov

 all: extract_fastq genomecov

 extract_fastq: $(RDIR)/$(DOUBLED_SAMPLE).fq
 bam2sam: $(RDIR)/$(DOUBLED_SAMPLE).sam
 sort_bam: $(RDIR)/$(DOUBLED_SAMPLE).srtd.bam
 index_bam: $(RDIR)/$(DOUBLED_SAMPLE).srtd.bai
 genomecov: $(RDIR)/$(DOUBLED_SAMPLE).srtd.cov_avg.txt

 # SOME BASIC STATISTICS
 get_unique_seq_count: $(RDIR)/$(DOUBLED_SAMPLE).sam
        awk '{print $$1}' $^ | sort |uniq -c | wc -l

 get_mapq_distribution: $(RDIR)/ $(DOUBLED_SAMPLE).sam
        awk -F"\t" '{print $$5}' $^ | sort |uniq -c

 get_cigar_distribution: $(RDIR)/$(DOUBLED_SAMPLE).sam
        awk -F"\t" '{print $$6}' $^ | sort |uniq -c

 #####
 # ACTUAL TARGETS
 #####
 .SECONDARY:

 .SECONDEXPANSION:
 $(RDIR)/%.fq: $(DDIR)/$$(notdir $$*).bam
        mkdir -p $(dir $@)
        @date
        time bedtools bamtofastq -i $^ -fq $@
        @date

 $(RDIR)/%.sam: $(DDIR)/$$(notdir $$*).bam
        @date
        time samtools view $^ > $@
        @date

 $(RDIR)/%.srtd.bam: $(DDIR)/$$(notdir $$*).bam
        @date
        time samtools sort $^ $(@:.bam=)
        @date

 %.bai: %.bam
        @date
        time samtools index $^ $@
        @date

 %.srtd.cov_hist.txt: %.srtd.bam %.srtd.bai
        @date
        bedtools genomecov -ibam $(word 1,$^) > $@
        @date

 %.cov_avg.txt: %.cov_hist.txt
        @date
        awk -F"\t" 'BEGIN {pc=""} \
        {\
                c=$$1;\
                if (c == pc) {\
                        cov=cov+$$2*$$5;\
                } else {\
                        print pc,cov;\
                        cov=$$2*$$5;\
                pc=c}\
        } END {print pc,cov}' $^ | tail -n +2 > $@
        @date

 #####
 # CLEAN-UP
 #####
 clean:
        echo "TODO: clean"
	SHELL=/bin/bash

	SAMPLE?=<YOUR_SAMPLE>
	DOUBLED_SAMPLE = $(SAMPLE)/$(SAMPLE)

	RDIR?=results
	DDIR?=data

	#####
	# BEAUTY TARGETS
	#####

	.PHONY: all extract_fastq bam2sam sort_bam index_bam genomecov

	all: extract_fastq genomecov

	extract_fastq: $(RDIR)/$(DOUBLED_SAMPLE).fq
	bam2sam: $(RDIR)/$(DOUBLED_SAMPLE).sam
	sort_bam: $(RDIR)/$(DOUBLED_SAMPLE).srtd.bam
	index_bam: $(RDIR)/$(DOUBLED_SAMPLE).srtd.bai
	genomecov: $(RDIR)/$(DOUBLED_SAMPLE).srtd.cov_avg.txt

	# SOME BASIC STATISTICS
	get_unique_seq_count: $(RDIR)/$(DOUBLED_SAMPLE).sam
	awk '{print $$1}' $^ \| sort \|uniq -c \| wc -l

	get_mapq_distribution: $(RDIR)/ $(DOUBLED_SAMPLE).sam
	awk -F"\t" '{print $$5}' $^ \| sort \|uniq -c

	get_cigar_distribution: $(RDIR)/$(DOUBLED_SAMPLE).sam
	awk -F"\t" '{print $$6}' $^ \| sort \|uniq -c

	#####
	# ACTUAL TARGETS
	#####
	.SECONDARY:

	.SECONDEXPANSION:
	$(RDIR)/%.fq: $(DDIR)/$$(notdir $$*).bam
	mkdir -p $(dir $@)
	@date
	time bedtools bamtofastq -i $^ -fq $@
	@date

	$(RDIR)/%.sam: $(DDIR)/$$(notdir $$*).bam
	@date
	time samtools view $^ > $@
	@date

	$(RDIR)/%.srtd.bam: $(DDIR)/$$(notdir $$*).bam
	@date
	time samtools sort $^ $(@:.bam=)
	@date

	%.bai: %.bam
	@date
	time samtools index $^ $@
	@date

	%.srtd.cov_hist.txt: %.srtd.bam %.srtd.bai
	@date
	bedtools genomecov -ibam $(word 1,$^) > $@
	@date

	%.cov_avg.txt: %.cov_hist.txt
	@date
	awk -F"\t" 'BEGIN {pc=""} \
	{\
	c=$$1;\
	if (c == pc) {\
	cov=cov+$$2*$$5;\
	} else {\
	print pc,cov;\
	cov=$$2*$$5;\
	pc=c}\
	} END {print pc,cov}' $^ \| tail -n +2 > $@
	@date

	#####
	# CLEAN-UP
	#####
	clean:
	echo "TODO: clean"