Created
November 28, 2023 21:47
-
-
Save jcrist/35ddeced755402894c4436168ebe2314 to your computer and use it in GitHub Desktop.
A quick benchmark of msgspec vs mashumaro to clear up misconceptions in a flyte issue
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import importlib.metadata | |
import timeit | |
from dataclasses import dataclass | |
import msgspec | |
import orjson | |
from mashumaro.codecs.json import JSONEncoder, JSONDecoder | |
from mashumaro.codecs.orjson import ORJSONEncoder, ORJSONDecoder | |
# Dataclass type definitions, copied from | |
# https://flyte.org/blog/flyte-1-10-monorepo-new-agents-eager-workflows-and-more#mashumaro-to-serializedeserialize-dataclasses | |
@dataclass | |
class CurrencyPosition: | |
currency: str | |
balance: float | |
@dataclass | |
class StockPosition: | |
ticker: str | |
name: str | |
balance: int | |
@dataclass | |
class OP: | |
currencies: list[CurrencyPosition] | |
stocks: list[StockPosition] | |
# Identical types to those above, but this time defined as msgspec structs | |
class CurrencyPositionStruct(msgspec.Struct): | |
currency: str | |
balance: float | |
class StockPositionStruct(msgspec.Struct): | |
ticker: str | |
name: str | |
balance: int | |
class OPStruct(msgspec.Struct): | |
currencies: list[CurrencyPositionStruct] | |
stocks: list[StockPositionStruct] | |
# The in-memory message structure, also copied from that flyte blogpost above | |
dataclass_obj = OP( | |
currencies=[ | |
CurrencyPosition("USD", 238.67), | |
CurrencyPosition("EUR", 361.84), | |
], | |
stocks=[ | |
StockPosition("AAPL", "Apple", 10), | |
StockPosition("AMZN", "Amazon", 10), | |
], | |
) | |
# Make a copy of the in-memory message, but converted to our struct types | |
struct_obj = msgspec.convert(dataclass_obj, OPStruct, from_attributes=True) | |
# The message serialized as json | |
json = msgspec.json.encode(struct_obj) | |
# Here we define a bunch of encoder/decoder objects for benchmark test case | |
mashumaro_json_encoder = JSONEncoder(OP) | |
mashumaro_json_decoder = JSONDecoder(OP) | |
mashumaro_orjson_encoder = ORJSONEncoder(OP) | |
mashumaro_orjson_decoder = ORJSONDecoder(OP) | |
msgspec_dataclass_decoder = msgspec.json.Decoder(OP) | |
msgspec_struct_decoder = msgspec.json.Decoder(OPStruct) | |
msgspec_json_encoder = msgspec.json.Encoder() | |
encode_cases = [ | |
("mashumaro & json", lambda: mashumaro_json_encoder.encode(dataclass_obj)), | |
("mashumaro & orjson", lambda: mashumaro_orjson_encoder.encode(dataclass_obj)), | |
("msgspec & dataclasses", lambda: msgspec_json_encoder.encode(dataclass_obj)), | |
("msgspec & structs", lambda: msgspec_json_encoder.encode(struct_obj)), | |
] | |
decode_cases = [ | |
("mashumaro & json", lambda: mashumaro_json_decoder.decode(json)), | |
("mashumaro & orjson", lambda: mashumaro_orjson_decoder.decode(json)), | |
("msgspec & dataclasses", lambda: msgspec_dataclass_decoder.decode(json)), | |
("msgspec & structs", lambda: msgspec_struct_decoder.decode(json)), | |
] | |
def run_benchmarks_and_print_results(): | |
for title, cases in [("Encoding:", encode_cases), ("Decoding:", decode_cases)]: | |
print(title) | |
results = [] | |
for case, func in cases: | |
func() # Call once to warmup | |
timer = timeit.Timer("func()", globals={"func": func}) | |
n, time = timer.autorange() | |
results.append((case, time / n)) | |
# Format a nice results table | |
results.sort(key=lambda x: x[1]) | |
best = results[0][1] | |
for case, time in results: | |
print(f"- {case + ':':22} {1_000_000 * time:.1f} µs ({time / best:.1f}x)") | |
print("") | |
print("Library versions:") | |
print(f"- Python: {'.'.join(map(str, sys.version_info))}") | |
print(f"- msgspec: {msgspec.__version__}") | |
print(f"- mashumaro: {importlib.metadata.version('mashumaro')}") | |
print(f"- orjson: {orjson.__version__}") | |
run_benchmarks_and_print_results() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This was a quick benchmark written up to clear up a performance misconception raised in this flyte issue.
Results: