Skip to content

Instantly share code, notes, and snippets.

@jannismain
Last active April 23, 2025 15:08
Show Gist options
  • Save jannismain/e96666ca4f059c3e5bc28abb711b5c92 to your computer and use it in GitHub Desktop.
Save jannismain/e96666ca4f059c3e5bc28abb711b5c92 to your computer and use it in GitHub Desktop.
A JSON Encoder in Python, that puts small lists on single lines.
#!/usr/bin/env python3
from __future__ import annotations
import json
class CompactJSONEncoder(json.JSONEncoder):
"""A JSON Encoder that puts small containers on single lines."""
CONTAINER_TYPES = (list, tuple, dict)
"""Container datatypes include primitives or other containers."""
MAX_WIDTH = 70
"""Maximum width of a container that might be put on a single line."""
MAX_ITEMS = 10
"""Maximum number of items in container that might be put on single line."""
def __init__(self, *args, **kwargs):
# using this class without indentation is pointless
if kwargs.get("indent") is None:
kwargs["indent"] = 4
super().__init__(*args, **kwargs)
self.indentation_level = 0
def encode(self, o):
"""Encode JSON object *o* with respect to single line lists."""
if isinstance(o, (list, tuple)):
return self._encode_list(o)
if isinstance(o, dict):
return self._encode_object(o)
return json.dumps(
o,
skipkeys=self.skipkeys,
ensure_ascii=self.ensure_ascii,
check_circular=self.check_circular,
allow_nan=self.allow_nan,
sort_keys=self.sort_keys,
indent=self.indent,
separators=(self.item_separator, self.key_separator),
default=self.default if hasattr(self, "default") else None,
)
def _encode_list(self, o):
if self._put_on_single_line(o):
return "[" + ", ".join(self.encode(el) for el in o) + "]"
self.indentation_level += 1
output = [self.indent_str + self.encode(el) for el in o]
self.indentation_level -= 1
return "[\n" + ",\n".join(output) + "\n" + self.indent_str + "]"
def _encode_object(self, o):
if not o:
return "{}"
# ensure keys are converted to strings
o = {str(k) if k is not None else "null": v for k, v in o.items()}
if self.sort_keys:
o = dict(sorted(o.items(), key=lambda x: x[0]))
if self._put_on_single_line(o):
return (
"{ "
+ ", ".join(
f"{self.encode(k)}: {self.encode(el)}" for k, el in o.items()
)
+ " }"
)
self.indentation_level += 1
output = [
f"{self.indent_str}{self.encode(k)}: {self.encode(v)}" for k, v in o.items()
]
self.indentation_level -= 1
return "{\n" + ",\n".join(output) + "\n" + self.indent_str + "}"
def iterencode(self, o, **kwargs):
"""Required to also work with `json.dump`."""
return self.encode(o)
def _put_on_single_line(self, o):
return (
self._primitives_only(o)
and len(o) <= self.MAX_ITEMS
and len(str(o)) - 2 <= self.MAX_WIDTH
)
def _primitives_only(self, o: list | tuple | dict):
if isinstance(o, (list, tuple)):
return not any(isinstance(el, self.CONTAINER_TYPES) for el in o)
elif isinstance(o, dict):
return not any(isinstance(el, self.CONTAINER_TYPES) for el in o.values())
@property
def indent_str(self) -> str:
if isinstance(self.indent, int):
return " " * (self.indentation_level * self.indent)
elif isinstance(self.indent, str):
return self.indentation_level * self.indent
else:
raise ValueError(
f"indent must either be of type int or str (is: {type(self.indent)})"
)
if __name__ == "__main__":
import sys
if "--example" in sys.argv:
data = {
"compact_object": {"first": "element", "second": 2},
"compact_list": ["first", "second"],
"long_list": [
"this",
"is",
"a",
"rather",
"long\nlist",
"and should be broken up because of its width",
],
"non_ascii_ๆฑ‰": "ๆฑ‰่ฏญ",
1: 2,
}
json.dump(data, sys.stdout, cls=CompactJSONEncoder, ensure_ascii=False)
exit()
json.dump(json.load(sys.stdin), sys.stdout, cls=CompactJSONEncoder)
@Royal724
Copy link

Royal724 commented Jan 11, 2024

@@jannismain
Thank you for your response. I downloaded your module and crossed it to the same folder where my script is, then made the changes I wrote below, but it didn't work. My json file stays as it is, no compact format. I'm sorry, I'm making a mistake somewhere, but I can't figure out where, as I don't have much experience.
And created a new folder pycache in my folder and in it is the file CompactJSONEncoder.cpython-311.pyc

from CompactJSONEncoder import CompactJSONEncoder

json.dump(clubs, jsonFile, cls=CompactJSONEncoder)

@PenutChen
Copy link

@Royal724 maybe your json file have too long width or too many items, try to set MAX_ITEMS or MAX_WIDTH to a larger number.

@Royal724
Copy link

@PenutChen
Yes, that was the point, thank you.

@jannismain
Excellent work! Thank you so much!

@olin256
Copy link

olin256 commented Jan 25, 2024

There's a small bug: If the keys of a dict are integers of floats, they don't get converted to strings. Perhaps adding inner str() might be a solutionโ€ฆ

@jannismain
Copy link
Author

There's a small bug: If the keys of a dict are integers of floats, they don't get converted to strings. Perhaps adding inner str() might be a solutionโ€ฆ

You are right, I didnโ€™t even think to treat the keys in any way.. will push a revision soon to address this! ๐Ÿ‘

@jannismain
Copy link
Author

jannismain commented Jan 30, 2024

@olin256 I'm now converting keys to string to ensure the output produced is valid JSON.

@jannismain
Copy link
Author

@oesteban @PenutChen sort_keys=True is now supported ๐Ÿ‘

@Xonxt
Copy link

Xonxt commented Sep 25, 2024

Little suggestion to also correctly process Numpy types:

    def encode(self, o):
        """Encode JSON object *o* with respect to single line lists."""
        if isinstance(o, (list, tuple)):
            return self._encode_list(o)
        elif isinstance(o, dict):
            return self._encode_object(o)
        if isinstance(o, float):  # Use scientific notation for floats
            return format(o, "g") 
        elif isinstance(o, np.integer):  # process numpy integers
            return self.encode(int(o))
        elif isinstance(o, np.floating): # process numpy floats
            return self.encode(float(o))
        elif isinstance(o, np.ndarray): # flatten numpy arrays as lists
            return self._encode_list(o.tolist())

        return json.dumps(
            o,
            skipkeys=self.skipkeys,
            ensure_ascii=self.ensure_ascii,
            check_circular=self.check_circular,
            allow_nan=self.allow_nan,
            sort_keys=self.sort_keys,
            indent=self.indent,
            separators=(self.item_separator, self.key_separator),
            default=self.default if hasattr(self, "default") else None,
        )

Also, personally, I would remove the format(o, "g") part, as it sometimes results in converting floats into exponential notation.

@ed2050
Copy link

ed2050 commented Jan 25, 2025

Awesome. I was going to implement something like this myself, but thought I'd check for an existing implementation first. This saved me so much time, and quite thorough job. Well done. ๐Ÿ‘

@GrHalbgott
Copy link

@jannismain This saved me so much time, thanks a lot for the excellent work! ๐Ÿ‘ And all you other guys for optimizing it ^^

@chrismaes87
Copy link

Another minor bugfix:

f"{self.encode(k)}: {self.encode(el)}" for k, el in o.items() otherwise the key encoding is not correct (I had a non-ascii character in there)

@senyai
Copy link

senyai commented Apr 23, 2025

Most json.dumps callas are missing at least ensure_ascii=self.ensure_ascii

@senyai
Copy link

senyai commented Apr 23, 2025

This JSON data won't be formatted consistently:

{
  "data1": [
    27.135222728033753,
    114.02096846633076,
    26.074562556253937,
    24.57196064623251
  ],
  "data2": [
    27.135222,
    114.02094,
    26.074565,
    24.571964
  ]
}

@nikita-k0v
Copy link

@senyai
hi, to keep the number of digits after the decimal comma, remove the lines 32-33:

        if isinstance(o, float):  # Use scientific notation for floats
            return format(o, "g")

and to have both lists be printed on just one line, increase the value of MAX_WIDTH.

In this case, you will have the output:

{
  "data1": [27.135222728033753, 114.02096846633076, 26.074562556253937, 24.57196064623251],
  "data2": [27.135222, 114.02094, 26.074565, 24.571964]
}

@jannismain
Copy link
Author

Thanks @senyai @nikita-k0v and @chrismaes87, I have incorporated your suggestions.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment