davebridges · November 21, 2012 14:15
diff --git a/counts_table.R b/counts_table.R
 require(GenomicFeatures)
 require(biomaRt)
 #set this to where your tophat_out directory is.
 working_directory = "/some_directory/"
 setwd(working_directory)
 #make a database of transcripts from the ensembl assembly
 #first get the current release 69 from ensembl, change this to your species if not using mice
 txdb <- makeTranscriptDbFromBiomart(biomart="ensembl",dataset = 'mmusculus_gene_ensembl')
 #make exon and transcript annotation objects
 save(txdb, file="txdb.Robject")
 exons <- exons(txdb, columns=c('gene_id', 'tx_id', 'tx_name', 'tx_chrom', 'tx_strand', 'exon_id', 'exon_name', 'cds_id', 'cds_name', 'cds_chrom', 'cds_strand', 'exon_rank'))
 save(exons, file="exons.Robject")
 transcripts <- transcripts(txdb, columns=c('gene_id', 'tx_id', 'tx_name', 'exon_id', 'exon_name', 'exon_chrom', 'exon_strand', 'cds_id', 'cds_name', 'cds_chrom', 'cds_strand', 'exon_rank'))


 require(GenomicRanges)
 require(Rsamtools)

 #set list of sample ids as a vector
 sample_ids = seq(12849,12858)

 transcript.countsTable <- data.frame(
  row.names = as.vector(unlist(elementMetadata(transcripts)['tx_name'])))
 exon.countsTable <- data.frame(
  row.names = as.vector(unlist(elementMetadata(exons)['exon_name'])))

 #this forloop iterates over the sample_ids and generates exon and transcript counts for each sample_id
 for(sample_id in sample_ids) {
  #read alignment
  align <- readBamGappedAlignments(sprintf("tophat_out/Sample_%s/accepted_hits.bam", sample_id))
  #count the overlapping reads for the transcripts
  transcript.counts <- countOverlaps(transcripts, align)
  #reassign to a specific transcript.counts object.
  assign(sprintf("transcript.counts.%s", sample_id), transcript.counts)
  #add this column to the countsTable
  transcript.countsTable <- cbind(transcript.countsTable,transcript.counts)
  remove(transcript.counts)
  #count the overlapping reads for the exons
  exon.counts <- countOverlaps(exons, align)
  #reassign to a specific transcript.counts object.
  assign(sprintf("exon.counts.%s", sample_id), exon.counts)
  #add this column to the countsTable
  exon.countsTable <- cbind(exon.countsTable, exon.counts)  
  #remove the align, transcript.counts and exon.counts objects for the next loop
  remove(align)
  remove(transcript.counts)
  remove(exon.counts)
  }

 summary(transcriptCountsTable)
 summary(exonCountsTable)

 #write these two counts tables to csv files.
 write.csv(transcriptCountsTable, "transcript_counts_table.csv")
 write.csv(exonCountsTable, "exon_counts_table.csv")
	require(GenomicFeatures)
	require(biomaRt)
	#set this to where your tophat_out directory is.
	working_directory = "/some_directory/"
	setwd(working_directory)
	#make a database of transcripts from the ensembl assembly
	#first get the current release 69 from ensembl, change this to your species if not using mice
	txdb <- makeTranscriptDbFromBiomart(biomart="ensembl",dataset = 'mmusculus_gene_ensembl')
	#make exon and transcript annotation objects
	save(txdb, file="txdb.Robject")
	exons <- exons(txdb, columns=c('gene_id', 'tx_id', 'tx_name', 'tx_chrom', 'tx_strand', 'exon_id', 'exon_name', 'cds_id', 'cds_name', 'cds_chrom', 'cds_strand', 'exon_rank'))
	save(exons, file="exons.Robject")
	transcripts <- transcripts(txdb, columns=c('gene_id', 'tx_id', 'tx_name', 'exon_id', 'exon_name', 'exon_chrom', 'exon_strand', 'cds_id', 'cds_name', 'cds_chrom', 'cds_strand', 'exon_rank'))


	require(GenomicRanges)
	require(Rsamtools)

	#set list of sample ids as a vector
	sample_ids = seq(12849,12858)

	transcript.countsTable <- data.frame(
	row.names = as.vector(unlist(elementMetadata(transcripts)['tx_name'])))
	exon.countsTable <- data.frame(
	row.names = as.vector(unlist(elementMetadata(exons)['exon_name'])))

	#this forloop iterates over the sample_ids and generates exon and transcript counts for each sample_id
	for(sample_id in sample_ids) {
	#read alignment
	align <- readBamGappedAlignments(sprintf("tophat_out/Sample_%s/accepted_hits.bam", sample_id))
	#count the overlapping reads for the transcripts
	transcript.counts <- countOverlaps(transcripts, align)
	#reassign to a specific transcript.counts object.
	assign(sprintf("transcript.counts.%s", sample_id), transcript.counts)
	#add this column to the countsTable
	transcript.countsTable <- cbind(transcript.countsTable,transcript.counts)
	remove(transcript.counts)
	#count the overlapping reads for the exons
	exon.counts <- countOverlaps(exons, align)
	#reassign to a specific transcript.counts object.
	assign(sprintf("exon.counts.%s", sample_id), exon.counts)
	#add this column to the countsTable
	exon.countsTable <- cbind(exon.countsTable, exon.counts)
	#remove the align, transcript.counts and exon.counts objects for the next loop
	remove(align)
	remove(transcript.counts)
	remove(exon.counts)
	}

	summary(transcriptCountsTable)
	summary(exonCountsTable)

	#write these two counts tables to csv files.
	write.csv(transcriptCountsTable, "transcript_counts_table.csv")
	write.csv(exonCountsTable, "exon_counts_table.csv")