Created
January 13, 2020 22:09
-
-
Save ostcar/2261240f8c26378a7ad40cd3de80fb84 to your computer and use it in GitHub Desktop.
Test decoding of big list of ints
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bytes" | |
"encoding/binary" | |
"encoding/json" | |
"fmt" | |
"io" | |
"strconv" | |
"time" | |
) | |
const idsCount = 4_000_000 | |
func main() { | |
ids := make([]int, idsCount) | |
for i := 0; i < idsCount; i++ { | |
ids[i] = i | |
} | |
start := time.Now() | |
jsonIDs, _ := json.Marshal(ids) | |
end := time.Since(start) | |
fmt.Printf("encoded is %d bytes, took %d ms\n", len(jsonIDs), end/time.Millisecond) | |
var newIDs []int | |
start = time.Now() | |
json.Unmarshal(jsonIDs, &newIDs) | |
end = time.Since(start) | |
fmt.Printf("decoding, took %d ms\n", end/time.Millisecond) | |
if !testEq(ids, newIDs) { | |
panic("new IDs not equal") | |
} | |
start = time.Now() | |
jsonIDs = encodeJSON(ids) | |
end = time.Since(start) | |
fmt.Printf("encoded is %d bytes, took %d ms\n", len(jsonIDs), end/time.Millisecond) | |
start = time.Now() | |
newIDs = decodeJSON(jsonIDs) | |
end = time.Since(start) | |
fmt.Printf("decoding, took %d ms\n", end/time.Millisecond) | |
if !testEq(ids, newIDs) { | |
panic("new IDs not equal") | |
} | |
start = time.Now() | |
jsonIDs = encodeBytes(ids) | |
end = time.Since(start) | |
fmt.Printf("encoded is %d bytes, took %d ms\n", len(jsonIDs), end/time.Millisecond) | |
start = time.Now() | |
newIDs = decodeBytes(jsonIDs) | |
end = time.Since(start) | |
fmt.Printf("decoding, took %d ms\n", end/time.Millisecond) | |
if !testEq(ids, newIDs) { | |
panic("new IDs not equal") | |
} | |
} | |
func encodeJSON(ids []int) []byte { | |
buf := []byte{'['} | |
for _, id := range ids { | |
buf = strconv.AppendInt(buf, int64(id), 10) | |
buf = append(buf, ',') | |
} | |
buf[len(buf)-1] = ']' | |
return buf | |
} | |
func decodeJSON(buf []byte) []int { | |
var out []int | |
buf = buf[1:] | |
var idx int | |
for { | |
idx = bytes.IndexByte(buf, ',') | |
if idx == -1 { | |
break | |
} | |
id, _ := strconv.Atoi(string(buf[:idx])) | |
out = append(out, id) | |
buf = buf[idx+1:] | |
} | |
id, _ := strconv.Atoi(string(buf[:len(buf)-1])) | |
out = append(out, id) | |
return out | |
} | |
func encodeBytes(ids []int) []byte { | |
buf := bytes.NewBuffer(nil) | |
for _, id := range ids { | |
binary.Write(buf, binary.LittleEndian, int32(id)) | |
} | |
return buf.Bytes() | |
} | |
func decodeBytes(b []byte) []int { | |
r := bytes.NewReader(b) | |
var out []int | |
var id int32 | |
for { | |
if err := binary.Read(r, binary.LittleEndian, &id); err == io.EOF { | |
break | |
} | |
out = append(out, int(id)) | |
} | |
return out | |
} | |
func testEq(a, b []int) bool { | |
// If one is nil, the other must also be nil. | |
if (a == nil) != (b == nil) { | |
return false | |
} | |
if len(a) != len(b) { | |
return false | |
} | |
for i := range a { | |
if a[i] != b[i] { | |
return false | |
} | |
} | |
return true | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import time | |
from typing import List | |
ids = list(range(4_000_000)) | |
# Encode ids to json | |
start = time.time() | |
json_ids = json.dumps(ids, separators=(",", ":")) | |
end = int(1000 * (time.time() - start)) | |
print(f"encoding is {len(json_ids)} bytes, took {end} ms") | |
# Decode ids from json | |
start = time.time() | |
new_ids = json.loads(json_ids) | |
end = int(1000 * (time.time() - start)) | |
print(f"decoding, took {end} ms") | |
if new_ids != ids: | |
raise RuntimeError() | |
def encode_json(ids: List[int]) -> str: | |
out = bytearray() | |
out.append(ord("[")) | |
for id in ids: | |
out.extend(str(id).encode()) | |
out.append(ord(",")) | |
out[-1] = ord("]") | |
return out.decode() | |
def decode_json(s: str) -> List[int]: | |
out = [] | |
i = 1 | |
while True: | |
try: | |
j = s.index(",", i) | |
except ValueError: | |
break | |
out.append(int(s[i:j])) | |
i = j+1 | |
out.append(int(s[i:-1])) | |
return out | |
# Encode ids to json | |
start = time.time() | |
json_ids = encode_json(ids) | |
end = int(1000 * (time.time() - start)) | |
print(f"encoding is {len(json_ids)} bytes, took {end} ms") | |
# Decode ids from json | |
start = time.time() | |
new_ids = decode_json(json_ids) | |
end = int(1000 * (time.time() - start)) | |
print(f"decoding, took {end} ms") | |
if new_ids != ids: | |
raise RuntimeError() | |
def int_to_bytes(ids: List[int]) -> bytes: | |
out = bytearray(len(ids)*4) | |
for id in ids: | |
out.extend(id.to_bytes(4, "big")) | |
return out | |
def bytes_to_int(b: bytes) -> List[int]: | |
out = [] | |
while b: | |
id = int.from_bytes(b[:4], "big") | |
out.append(id) | |
b = b[4:] | |
return out | |
# Encode ids to bytes | |
start = time.time() | |
bytes_ids = int_to_bytes(ids) | |
end = int(1000 * (time.time() - start)) | |
print(f"encoding is {len(json_ids)} bytes, took {end} ms") | |
if False: | |
# Devode ids from bytes | |
start = time.time() | |
new_ids = bytes_to_int(bytes_ids) | |
end = int(1000 * (time.time() - start)) | |
if new_ids != ids: | |
raise RuntimeError() | |
print(f"decoding, took {end} ms") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ python bigjson.py | |
encoding is 30888891 bytes, took 404 ms | |
decoding, took 349 ms | |
encoding is 30888891 bytes, took 1440 ms | |
decoding, took 1790 ms | |
encoding is 30888891 bytes, took 622 ms | |
$ go run bigjson.go | |
encoded is 30888891 bytes, took 174 ms | |
decoding, took 893 ms | |
encoded is 30888891 bytes, took 111 ms | |
decoding, took 153 ms | |
encoded is 16000000 bytes, took 180 ms | |
decoding, took 175 ms |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment