Created
November 21, 2012 14:15
-
-
Save davebridges/4125043 to your computer and use it in GitHub Desktop.
Generate Exon and Transcript Counts Table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(GenomicFeatures) | |
require(biomaRt) | |
#set this to where your tophat_out directory is. | |
working_directory = "/some_directory/" | |
setwd(working_directory) | |
#make a database of transcripts from the ensembl assembly | |
#first get the current release 69 from ensembl, change this to your species if not using mice | |
txdb <- makeTranscriptDbFromBiomart(biomart="ensembl",dataset = 'mmusculus_gene_ensembl') | |
#make exon and transcript annotation objects | |
save(txdb, file="txdb.Robject") | |
exons <- exons(txdb, columns=c('gene_id', 'tx_id', 'tx_name', 'tx_chrom', 'tx_strand', 'exon_id', 'exon_name', 'cds_id', 'cds_name', 'cds_chrom', 'cds_strand', 'exon_rank')) | |
save(exons, file="exons.Robject") | |
transcripts <- transcripts(txdb, columns=c('gene_id', 'tx_id', 'tx_name', 'exon_id', 'exon_name', 'exon_chrom', 'exon_strand', 'cds_id', 'cds_name', 'cds_chrom', 'cds_strand', 'exon_rank')) | |
require(GenomicRanges) | |
require(Rsamtools) | |
#set list of sample ids as a vector | |
sample_ids = seq(12849,12858) | |
transcript.countsTable <- data.frame( | |
row.names = as.vector(unlist(elementMetadata(transcripts)['tx_name']))) | |
exon.countsTable <- data.frame( | |
row.names = as.vector(unlist(elementMetadata(exons)['exon_name']))) | |
#this forloop iterates over the sample_ids and generates exon and transcript counts for each sample_id | |
for(sample_id in sample_ids) { | |
#read alignment | |
align <- readBamGappedAlignments(sprintf("tophat_out/Sample_%s/accepted_hits.bam", sample_id)) | |
#count the overlapping reads for the transcripts | |
transcript.counts <- countOverlaps(transcripts, align) | |
#reassign to a specific transcript.counts object. | |
assign(sprintf("transcript.counts.%s", sample_id), transcript.counts) | |
#add this column to the countsTable | |
transcript.countsTable <- cbind(transcript.countsTable,transcript.counts) | |
remove(transcript.counts) | |
#count the overlapping reads for the exons | |
exon.counts <- countOverlaps(exons, align) | |
#reassign to a specific transcript.counts object. | |
assign(sprintf("exon.counts.%s", sample_id), exon.counts) | |
#add this column to the countsTable | |
exon.countsTable <- cbind(exon.countsTable, exon.counts) | |
#remove the align, transcript.counts and exon.counts objects for the next loop | |
remove(align) | |
remove(transcript.counts) | |
remove(exon.counts) | |
} | |
summary(transcriptCountsTable) | |
summary(exonCountsTable) | |
#write these two counts tables to csv files. | |
write.csv(transcriptCountsTable, "transcript_counts_table.csv") | |
write.csv(exonCountsTable, "exon_counts_table.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment