tgirke · May 28, 2018 21:57
diff --git a/hmmscan.R b/hmmscan.R
 ##############################################
 ## Map Pfam Domains to Proteins with HMMER3 ##
 ##############################################
 ## Author: Thomas Girke
 ## Date: May 11, 2018

 ## Utility: mapping of Pfam domains to protein sequences. 
 ## The module load and Pfam database paths given below are specific to the HPCC/biocluster system.
 ## For details consult the man page for hmmscan from the command-line with 'hmmscan -h'

 library(systemPipeR)
 moduleload("hmmer/3.1b2")
 ## Note: the file myseq.txt contains yours protein sequences and Pfam-A.hmm is the Pfam database available on HPCC cluster
 system("hmmscan -E 0.1 --acc --domtblout output.pfam /srv/projects/db/pfam/2017-06-11-Pfam31.0/Pfam-A.hmm myseq.txt > /dev/null")

 ## Read Pfam mapping results into R
 pfdf <- read.table("output.pfam", comment="#", header=FALSE)
 ## For fast import use fread from data.table package
 library(data.table)
 pfdf <- fread("grep -v '^#' output.pfam") # syntax to ignore lines starting with `#`
 colnames(pfdf) <- c("target name", "accession1", "tlen", "query name", "accession2", "qlen", "E-value", "score1", "bias1", "No",  "of", "c-Evalue", "i-Evalue", "score2", "bias2", "from1", "to`", "from2", "to2", "from3", "to3",  "acc", "description of target")

 ## tibbles from dplyr are a more modern version of data.frames
 library(dplyr, quietly=TRUE)
 pfdf <- as_data_frame(pfdf)
 pfdf
	##############################################
	## Map Pfam Domains to Proteins with HMMER3 ##
	##############################################
	## Author: Thomas Girke
	## Date: May 11, 2018

	## Utility: mapping of Pfam domains to protein sequences.
	## The module load and Pfam database paths given below are specific to the HPCC/biocluster system.
	## For details consult the man page for hmmscan from the command-line with 'hmmscan -h'

	library(systemPipeR)
	moduleload("hmmer/3.1b2")
	## Note: the file myseq.txt contains yours protein sequences and Pfam-A.hmm is the Pfam database available on HPCC cluster
	system("hmmscan -E 0.1 --acc --domtblout output.pfam /srv/projects/db/pfam/2017-06-11-Pfam31.0/Pfam-A.hmm myseq.txt > /dev/null")

	## Read Pfam mapping results into R
	pfdf <- read.table("output.pfam", comment="#", header=FALSE)
	## For fast import use fread from data.table package
	library(data.table)
	pfdf <- fread("grep -v '^#' output.pfam") # syntax to ignore lines starting with `#`
	colnames(pfdf) <- c("target name", "accession1", "tlen", "query name", "accession2", "qlen", "E-value", "score1", "bias1", "No", "of", "c-Evalue", "i-Evalue", "score2", "bias2", "from1", "to`", "from2", "to2", "from3", "to3", "acc", "description of target")

	## tibbles from dplyr are a more modern version of data.frames
	library(dplyr, quietly=TRUE)
	pfdf <- as_data_frame(pfdf)
	pfdf