Last active
April 21, 2016 12:04
-
-
Save zhenjl/7560517 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bytes" | |
"code.google.com/p/snappy-go/snappy" | |
"github.com/cznic/zappy" | |
"compress/gzip" | |
"compress/lzw" | |
"flag" | |
"fmt" | |
lz4 "github.com/reducedb/go-lz4" | |
"io" | |
"io/ioutil" | |
"log" | |
"os" | |
"runtime" | |
"runtime/pprof" | |
"strings" | |
"time" | |
) | |
type paramList []string | |
type codecFunc func([]byte, []byte) ([]byte, error) | |
type Buffer struct { | |
b []byte | |
i int | |
} | |
func NewBuffer(p []byte) *Buffer { | |
return &Buffer{ | |
b: p, | |
i: 0, | |
} | |
} | |
func (this *Buffer) Bytes() []byte { | |
return this.b[:this.i] | |
} | |
func (this *Buffer) Read(p []byte) (n int, err error) { | |
r := len(this.b) - this.i | |
if r == 0 { | |
return 0, io.EOF | |
} | |
if r < len(p) { | |
copy(p, this.b[this.i:]) | |
this.i += r | |
return r, io.EOF | |
} | |
copy(p, this.b[this.i:this.i+len(p)]) | |
this.i += len(p) | |
return len(p), nil | |
} | |
func (this *Buffer) Write(p []byte) (n int, err error) { | |
r := len(this.b) - this.i | |
if r == 0 { | |
return 0, fmt.Errorf("No more space") | |
} | |
if len(p) > r { | |
copy(this.b[this.i:], p[:len(p)-r]) | |
this.i += len(p) - r | |
return len(p) - r, fmt.Errorf("Not enoughs pace") | |
} | |
copy(this.b[this.i:], p) | |
this.i += len(p) | |
return len(p), nil | |
} | |
var ( | |
filesParam, dirsParam, codecsParam paramList | |
pprofParam bool | |
files []string | |
) | |
func (this *paramList) String() string { | |
return fmt.Sprint(*this) | |
} | |
func (this *paramList) Set(value string) error { | |
for _, f := range strings.Split(value, ",") { | |
*this = append(*this, f) | |
} | |
return nil | |
} | |
func init() { | |
flag.BoolVar(&pprofParam, "pprof", false, "Whether or not to write pprof output.") | |
flag.Var(&filesParam, "file", "Files to process. There can be multiple of this, or comma separated list.") | |
flag.Var(&dirsParam, "dir", "The directory containing a list of files. There can be multiple of this, or comma separated list.") | |
flag.Var(&codecsParam, "codec", "The codec to use: lzw, lz3, zappy, snappy, and gzip. There can be multiple of this, or comma separated list.") | |
} | |
func readFile(path string) ([]byte, error) { | |
file, err := os.Open(path) | |
if err != nil { | |
return nil, err | |
} | |
defer file.Close() | |
if strings.HasSuffix(path, ".gz") { | |
gunzip, err := gzip.NewReader(file) | |
if err != nil { | |
return nil, err | |
} | |
defer gunzip.Close() | |
return ioutil.ReadAll(gunzip) | |
} | |
return ioutil.ReadAll(file) | |
} | |
func getDirOfFiles(path string) ([]string, error) { | |
filenames := make([]string, 0, 10) | |
files, err := ioutil.ReadDir(path) | |
if err != nil { | |
return nil, err | |
} | |
for _, f := range files { | |
filenames = append(filenames, path+"/"+f.Name()) | |
} | |
return filenames, nil | |
} | |
func loadFromFiles(files []string) ([][]byte, int, error) { | |
max := 0 | |
data := make([][]byte, 0, len(files)) | |
for _, f := range files { | |
fmt.Printf("Processing %s...", f) | |
res, err := readFile(f) | |
if err != nil { | |
return nil, 0, err | |
} | |
data = append(data, res) | |
if len(res) > max { | |
max = len(res) | |
} | |
fmt.Println("done.") | |
} | |
return data, max, nil | |
} | |
func getListOfFiles() []string { | |
files := make([]string, 0, 10) | |
for _, d := range dirsParam { | |
res, err := getDirOfFiles(d) | |
if err != nil { | |
log.Fatal(err) | |
} | |
files = append(files, res...) | |
} | |
files = append(files, filesParam...) | |
return files | |
} | |
func compress(codec codecFunc, in, out []byte, prof bool) (duration int, ret []byte, err error) { | |
now := time.Now() | |
if prof { | |
f, e := os.Create("cpu.compress.pprof") | |
if e != nil { | |
log.Fatal(e) | |
} | |
defer f.Close() | |
pprof.StartCPUProfile(f) | |
} | |
ret, err = codec(in, out) | |
since := time.Since(now).Nanoseconds() | |
if prof { | |
pprof.StopCPUProfile() | |
} | |
return int(since), ret, nil | |
} | |
func uncompress(codec codecFunc, in, out []byte, prof bool) (duration int, ret []byte, err error) { | |
now := time.Now() | |
if prof { | |
f, e := os.Create("cpu.uncompress.pprof") | |
if e != nil { | |
log.Fatal(e) | |
} | |
defer f.Close() | |
pprof.StartCPUProfile(f) | |
} | |
ret, err = codec(in, out) | |
since := time.Since(now).Nanoseconds() | |
if prof { | |
pprof.StopCPUProfile() | |
} | |
return int(since), ret, nil | |
} | |
func testCodecs(data [][]byte, max int, output bool) error { | |
compdata := make([]byte, max+max/3) | |
decompdata := make([]byte, max) | |
var ( | |
compOut, decompOut []byte | |
err error | |
compTime, decompTime int | |
) | |
for i, in := range data { | |
for _, codec := range codecsParam { | |
switch codec { | |
case "lzw": | |
compTime, compOut, err = compress(lzwCompress, in, compdata, pprofParam) | |
if err != nil { | |
return err | |
} | |
decompTime, decompOut, err = uncompress(lzwUncompress, compOut, decompdata, pprofParam) | |
if err != nil { | |
return err | |
} | |
case "lz4": | |
compTime, compOut, err = compress(lz4Compress, in, compdata, pprofParam) | |
if err != nil { | |
return err | |
} | |
decompTime, decompOut, err = uncompress(lz4Uncompress, compOut, decompdata, pprofParam) | |
if err != nil { | |
return err | |
} | |
case "zappy": | |
compTime, compOut, err = compress(zappyCompress, in, compdata, pprofParam) | |
if err != nil { | |
return err | |
} | |
decompTime, decompOut, err = uncompress(zappyUncompress, compOut, decompdata, pprofParam) | |
if err != nil { | |
return err | |
} | |
case "snappy": | |
compTime, compOut, err = compress(snappyCompress, in, compdata, pprofParam) | |
if err != nil { | |
return err | |
} | |
decompTime, decompOut, err = uncompress(snappyUncompress, compOut, decompdata, pprofParam) | |
if err != nil { | |
return err | |
} | |
case "gzip": | |
compTime, compOut, err = compress(gzipCompress, in, compdata, pprofParam) | |
if err != nil { | |
return err | |
} | |
decompTime, decompOut, err = uncompress(gzipUncompress, compOut, decompdata, pprofParam) | |
if err != nil { | |
return err | |
} | |
default: | |
return fmt.Errorf("unknown codec %s", codec) | |
} | |
if !bytes.Equal(in, decompOut) { | |
return fmt.Errorf("roundtrip mismatch, size in = %d, size comp = %d, size decomp = %d\n", len(in), len(compOut), len(decompOut)) | |
} | |
if output { | |
fmt.Printf("%-8s\t%-25s\t%2.2f%%\t%10d\t%5.2f\t%10d\t%5.2f\n", codec, files[i], float64(len(compOut))/float64(len(in))*100, compTime, (float64(len(in)) / (float64(compTime) / 1e9) / 1e6), decompTime, (float64(len(in)) / (float64(decompTime) / 1e9) / 1e6)) | |
//fmt.Printf("%-8s\t%-25s\t%2.2f%%\t%10d ns\t%5.2f MB/s\t%10d ns\t%5.2f MB/s\n", codec, files[i], float64(len(compOut))/float64(len(in))*100, compTime, (float64(len(in)) / (float64(compTime) / 1e9) / 1e6), decompTime, (float64(len(in)) / (float64(decompTime) / 1e9) / 1e6)) | |
} | |
runtime.GC() | |
} | |
} | |
return nil | |
} | |
func lz4Compress(in, out []byte) ([]byte, error) { | |
return lz4.Encode(out, in) | |
} | |
func lz4Uncompress(in, out []byte) ([]byte, error) { | |
return lz4.Decode(out, in) | |
} | |
func zappyCompress(in, out []byte) ([]byte, error) { | |
return zappy.Encode(out, in) | |
} | |
func zappyUncompress(in, out []byte) ([]byte, error) { | |
return zappy.Decode(out, in) | |
} | |
func snappyCompress(in, out []byte) ([]byte, error) { | |
return snappy.Encode(out, in) | |
} | |
func snappyUncompress(in, out []byte) ([]byte, error) { | |
return snappy.Decode(out, in) | |
} | |
func gzipCompress(in, out []byte) ([]byte, error) { | |
b := NewBuffer(out) | |
w := gzip.NewWriter(b) | |
_, err := w.Write(in) | |
w.Close() | |
return b.Bytes(), err | |
} | |
func gzipUncompress(in, out []byte) ([]byte, error) { | |
b := NewBuffer(in) | |
r, e := gzip.NewReader(b) | |
if e != nil { | |
return nil, e | |
} | |
defer r.Close() | |
recovered := out[0:] | |
total := 0 | |
n := 100 | |
var err error = nil | |
for err != io.EOF && n != 0 { | |
n, err = r.Read(recovered[total:]) | |
total += n | |
} | |
if err != io.EOF && err != nil { | |
return nil, err | |
} | |
return recovered[:total], nil | |
} | |
func lzwCompress(in, out []byte) ([]byte, error) { | |
b := NewBuffer(out) | |
w := lzw.NewWriter(b, lzw.MSB, 8) | |
_, err := w.Write(in) | |
w.Close() | |
return b.Bytes(), err | |
} | |
func lzwUncompress(in, out []byte) ([]byte, error) { | |
b := NewBuffer(in) | |
r := lzw.NewReader(b, lzw.MSB, 8) | |
defer r.Close() | |
recovered := out[0:] | |
total := 0 | |
n := 100 | |
var err error = nil | |
for err != io.EOF && n != 0 { | |
n, err = r.Read(recovered[total:]) | |
total += n | |
} | |
if err != io.EOF && err != nil { | |
return nil, err | |
} | |
return recovered[:total], nil | |
} | |
func main() { | |
flag.Parse() | |
files = getListOfFiles() | |
data, max, err := loadFromFiles(files) | |
if err != nil { | |
log.Fatal(err) | |
} | |
if err := testCodecs(data, max, false); err != nil { | |
log.Fatal(err) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment