Created
October 13, 2012 07:14
-
-
Save DisposaBoy/3883645 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"bytes" | |
"flag" | |
"fmt" | |
"github.com/drio/drio.go/common/files" | |
"io" | |
"log" | |
"os" | |
"runtime/pprof" | |
) | |
type record struct { | |
name, seq, qual []byte | |
} | |
func iterLines(r *bufio.Reader) func() ([]byte, bool) { | |
return func() ([]byte, bool) { | |
// note: this line simply creates two vars and then throw them away | |
// line, err := "", errors.New("") | |
// as far as i can see, the slice being copied so we can possibly speed | |
// things up even more by reusing the underlying array with r.ReadSlice | |
// line, err := r.ReadSlice('\n') | |
line, err := r.ReadBytes('\n') | |
if err != nil { | |
if err == io.EOF { | |
return line, true | |
} else { | |
fmt.Println(err) | |
os.Exit(1) | |
} | |
} | |
return line, false | |
} | |
} | |
// Original code: https://github.com/lh3/readfq | |
func ReadFq(r *bufio.Reader) func() (record, bool) { | |
var seq record | |
iter := iterLines(r) | |
last, done, finished := []byte{}, true, false | |
return func() (record, bool) { | |
if finished { | |
return seq, done | |
} | |
// Read the seq id (fasta or fastq) | |
if len(last) == 0 { | |
for l, done := iter(); !done; l, done = iter() { | |
if c := l[0]; c == '>' || c == '@' { | |
last = l[0 : len(l)-1] | |
break | |
} | |
} | |
} | |
if len(last) == 0 { | |
finished = true | |
return seq, done | |
} | |
seqName := bytes.Split(last[1:], []byte(" "))[0] | |
seq, last = record{name: seqName}, []byte{} | |
// Now read the sequence | |
for l, done := iter(); !done; l, done = iter() { | |
if c := l[0]; c == '+' || c == '>' || c == '@' { | |
last = l[0 : len(l)-1] | |
break | |
} | |
seq.seq = append(seq.seq, l[0:len(l)-1]...) | |
} | |
// We have a fasta record | |
if len(last) == 0 || last[0] != '+' { | |
return seq, !done | |
if len(last) == 0 { | |
return seq, done | |
} | |
} else { // this is a fastq record | |
leng := 0 | |
for l, done := iter(); !done; l, done = iter() { | |
seq.qual = append(seq.qual, l[0:len(l)-1]...) | |
leng += len(l) | |
if leng >= len(seq.seq) { // we have read enough quality | |
last = []byte{} | |
return seq, done | |
} | |
} | |
} | |
finished = true | |
return seq, !done | |
} | |
} | |
var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file") | |
func main() { | |
flag.Parse() | |
if *cpuprofile != "" { | |
f, err := os.Create(*cpuprofile) | |
if err != nil { | |
log.Fatal(err) | |
} | |
pprof.StartCPUProfile(f) | |
defer pprof.StopCPUProfile() | |
} | |
// sample file found at https://wiki.cgb.indiana.edu/display/isga/Sample+FastQ+Files | |
fp, r := files.Xopen("sample_1.fq") | |
defer fp.Close() | |
n, sLen, qLen := 0, int64(0), int64(0) | |
iter := ReadFq(r) | |
for r, done := iter(); !done; r, done = iter() { | |
n += 1 | |
sLen += int64(len(r.seq)) | |
qLen += int64(len(r.qual)) | |
} | |
fmt.Println(n, "\t", sLen, "\t", qLen) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment