Skip to content

Instantly share code, notes, and snippets.

@hughdbrown
Created August 10, 2025 19:18
Show Gist options
  • Select an option

  • Save hughdbrown/3b952006ae434964e007c1a30ff0031e to your computer and use it in GitHub Desktop.

Select an option

Save hughdbrown/3b952006ae434964e007c1a30ff0031e to your computer and use it in GitHub Desktop.
Slide stack in MarkDown for JSON in python
theme default
title JSON Mastery for Python Developers
class text-center
highlighter shiki
drawings
enabled
true
transition slide-left
mdc true

JSON Mastery for Python Developers

Advanced Techniques, Performance, and Best Practices

Press Space for next page

layout: center

What is JSON?

JavaScript Object Notation - A lightweight, text-based data interchange format

# JSON is everywhere in Python development
import json

data = {
    "name": "Alice",
    "age": 30,
    "skills": ["Python", "JavaScript", "SQL"],
    "active": True,
    "profile": None
}

json_string = json.dumps(data)
print(json_string)
# {"name": "Alice", "age": 30, "skills": ["Python", "JavaScript", "SQL"], "active": true, "profile": null}
JSON bridges the gap between Python objects and web APIs, configuration files, and data storage

JSON ↔ Python Data Types

Understanding the type mapping is crucial for intermediate developers

JSON Type Python Type Notes
object dict Key-value pairs
array list Ordered sequences
string str UTF-8 encoded
number int or float Automatic detection
true/false bool Boolean values
null None Null/empty value
# JSON to Python
json_data = '{"count": 42, "items": ["a", "b"], "valid": true, "meta": null}'
python_obj = json.loads(json_data)
# {'count': 42, 'items': ['a', 'b'], 'valid': True, 'meta': None}

# Python to JSON
python_data = {"temperature": 23.5, "readings": [1, 2, 3]}
json_string = json.dumps(python_data)
# {"temperature": 23.5, "readings": [1, 2, 3]}

# Gotcha: Tuples become arrays!
json.dumps({"coords": (10, 20)})  # {"coords": [10, 20]}

Core JSON Operations

The four essential functions every Python developer should master

import json

# loads() - Parse JSON string to Python object
json_str = '{"name": "Bob", "score": 95}'
data = json.loads(json_str)
print(data["name"])  # Bob

# dumps() - Convert Python object to JSON string
python_obj = {"users": ["Alice", "Bob"], "count": 2}
json_output = json.dumps(python_obj, indent=2)
print(json_output)
# Pretty-printed JSON with 2-space indentation

# load() - Read JSON from file
with open("config.json", "r") as file:
    config = json.load(file)
    
# Use the loaded configuration
database_url = config.get("database_url")

# dump() - Write JSON to file
data = {"timestamp": "2024-01-15", "processed": True}
with open("result.json", "w") as file:
    json.dump(data, file, indent=2)

Custom JSON Encoding

Making non-serializable objects JSON-friendly

import json
from datetime import datetime
from decimal import Decimal

# Problem: These objects aren't JSON serializable by default
data = {
    "timestamp": datetime.now(),
    "amount": Decimal("19.99")
}

# Solution 1: Custom JSONEncoder class
class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, datetime):
            return obj.isoformat()
        elif isinstance(obj, Decimal):
            return float(obj)
        return super().default(obj)

# Solution 2: Using default parameter
def json_serializer(obj):
    if isinstance(obj, datetime):
        return obj.isoformat()
    elif isinstance(obj, Decimal):
        return float(obj)
    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")

# Usage
json.dumps(data, cls=CustomEncoder)
json.dumps(data, default=json_serializer)

Custom JSON Decoding

Converting JSON back to complex Python objects

import json
from datetime import datetime
from dataclasses import dataclass

@dataclass
class User:
    name: str
    email: str
    created_at: datetime
    
    def __repr__(self):
        return f"User({self.name}, {self.email})"

# Custom object hook for decoding
def user_decoder(dct):
    if 'created_at' in dct and 'email' in dct:
        return User(
            name=dct['name'],
            email=dct['email'],
            created_at=datetime.fromisoformat(dct['created_at'])
        )
    return dct

# JSON string representing a user
user_json = '''
{
    "name": "Alice Johnson", 
    "email": "alice@example.com",
    "created_at": "2024-01-15T10:30:00"
}
'''

user_obj = json.loads(user_json, object_hook=user_decoder)
print(user_obj)  # User(Alice Johnson, alice@example.com)

Working with Complex Objects

Dataclasses, UUIDs, and datetime objects

import json
from dataclasses import dataclass, asdict
from datetime import datetime
from uuid import UUID, uuid4

@dataclass
class Product:
    id: UUID
    name: str
    price: float
    created_at: datetime
    
    def to_json(self):
        data = asdict(self)
        # Convert non-serializable fields
        data['id'] = str(data['id'])
        data['created_at'] = data['created_at'].isoformat()
        return data
    
    @classmethod
    def from_json(cls, json_str):
        data = json.loads(json_str)
        return cls(
            id=UUID(data['id']),
            name=data['name'],
            price=data['price'],
            created_at=datetime.fromisoformat(data['created_at'])
        )

# Usage
product = Product(
    id=uuid4(),
    name="Python Book",
    price=29.99,
    created_at=datetime.now()
)

json_data = json.dumps(product.to_json(), indent=2)
restored_product = Product.from_json(json_data)

layout: center class: text-center

Performance Matters

Standard json vs High-Performance Alternatives

When processing large datasets or building high-throughput APIs,
JSON performance can become a bottleneck.

Let's explore faster alternatives!


Performance Comparison

Benchmarking popular JSON libraries for Python

# Performance results from comprehensive benchmarks
# Processing a 2.2MB JSON file (Canada GeoJSON coordinates)

libraries = {
    "json":       {"time": "26.06ms", "speed": "1x",    "notes": "Python standard library"},
    "ujson":      {"time": "16.55ms", "speed": "1.6x",  "notes": "Ultra fast, C-based"},
    "rapidjson":  {"time": "29.26ms", "speed": "0.9x",  "notes": "Slower than standard!"},
    "orjson":     {"time": "9.69ms",  "speed": "2.7x",  "notes": "Rust-powered champion"}
}
Library Load Time Speed vs json Installation
json 26.06ms 1.0x (baseline) Built-in
ujson 16.55ms 1.6x faster pip install ujson
rapidjson 29.26ms 0.9x (slower!) pip install python-rapidjson
orjson 9.69ms 2.7x faster pip install orjson
Benchmarks based on real-world datasets. Results may vary by data structure and size.

Meet orjson

The fastest JSON library for Python, written in Rust

import orjson
from datetime import datetime
import uuid

# orjson advantages:
# ✅ 2-6x faster than standard json
# ✅ Built-in support for datetime, UUID, dataclasses
# ✅ Always returns bytes (explicit encoding)

# Basic usage
data = {"name": "Alice", "timestamp": datetime.now()}

# Encoding (note: returns bytes!)
json_bytes = orjson.dumps(data)
json_string = json_bytes.decode('utf-8')

# Decoding
parsed_data = orjson.loads(json_bytes)
# or
parsed_data = orjson.loads(json_string)

# Advanced features - automatic handling of complex types
complex_data = {
    "id": uuid.uuid4(),
    "created": datetime.now(),
    "coordinates": (40.7128, -74.0060),  # Tuple preserved as array
    "active": True
}

result = orjson.dumps(complex_data, option=orjson.OPT_INDENT_2)
print(result.decode())

Error Handling & Validation

Robust JSON processing for production code

import json
from typing import Dict, Any, Optional

def safe_json_loads(json_str: str) -> Optional[Dict[Any, Any]]:
    """Safely parse JSON with error handling."""
    try:
        return json.loads(json_str)
    except json.JSONDecodeError as e:
        print(f"JSON parsing error at line {e.lineno}, column {e.colno}: {e.msg}")
        return None
    except Exception as e:
        print(f"Unexpected error: {e}")
        return None

# Common validation patterns
def validate_user_data(data: dict) -> bool:
    """Validate required fields in user data."""
    required_fields = ['name', 'email']
    
    for field in required_fields:
        if field not in data or not data[field]:
            print(f"Missing required field: {field}")
            return False
    
    # Email validation (basic)
    if '@' not in data['email']:
        print("Invalid email format")
        return False
    
    return True

# Usage example
user_json = '{"name": "Bob", "email": "bob@example.com", "age": 25}'
user_data = safe_json_loads(user_json)

if user_data and validate_user_data(user_data):
    print("Valid user data received")
    # Process the user data
else:
    print("Invalid or malformed user data")
    # Handle error appropriately

Best Practices & Security

Production-ready JSON handling guidelines

🔒 Security

import json

# ❌ Never do this - security risk!
# eval(json_string)

# ✅ Always use json.loads()
data = json.loads(json_string)

# ✅ Validate input size
MAX_JSON_SIZE = 1024 * 1024  # 1MB
if len(json_string) > MAX_JSON_SIZE:
    raise ValueError("JSON too large")

# ✅ Set strict parsing
json.loads(json_string, strict=True)

🎯 Performance Tips

# ✅ Reuse encoder instances
encoder = json.JSONEncoder(separators=(',', ':'))
result = encoder.encode(data)

# ✅ Use appropriate alternatives
import orjson  # For speed
import ujson   # For compatibility + speed

# ✅ Minimize indent in production
json.dumps(data)  # No indent = smaller size

📏 Data Handling

# ✅ Handle large datasets efficiently
def process_large_json(file_path):
    with open(file_path, 'r') as f:
        for line in f:
            if line.strip():  # Skip empty lines
                try:
                    record = json.loads(line)
                    yield record
                except json.JSONDecodeError:
                    continue  # Skip malformed lines

# ✅ Use appropriate data types
from decimal import Decimal
json.dumps({"price": float(Decimal("19.99"))})

# ✅ Handle encoding properly
json.dumps(data, ensure_ascii=False)  # For Unicode

🛠 Migration Strategy

# Gradual migration to orjson
try:
    import orjson
    dumps = lambda x: orjson.dumps(x).decode()
    loads = orjson.loads
except ImportError:
    import json
    dumps = json.dumps
    loads = json.loads

layout: center class: text-center

Practical Example

Building a Configuration Manager with JSON

import json
import orjson
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, Optional

class ConfigManager:
    def __init__(self, config_file: str = "config.json", use_orjson: bool = True):
        self.config_file = Path(config_file)
        self.use_orjson = use_orjson
        self._config: Dict[str, Any] = {}
        self.load_config()
    
    def load_config(self) -> None:
        """Load configuration from JSON file."""
        if not self.config_file.exists():
            self._config = self._get_default_config()
            self.save_config()
            return
            
        try:
            with open(self.config_file, 'rb' if self.use_orjson else 'r') as f:
                if self.use_orjson:
                    self._config = orjson.loads(f.read())
                else:
                    self._config = json.load(f)
        except Exception as e:
            print(f"Error loading config: {e}")
            self._config = self._get_default_config()
    
    def save_config(self) -> None:
        """Save current configuration to file."""
        try:
            with open(self.config_file, 'wb' if self.use_orjson else 'w') as f:
                if self.use_orjson:
                    f.write(orjson.dumps(self._config, option=orjson.OPT_INDENT_2))
                else:
                    json.dump(self._config, f, indent=2)
        except Exception as e:
            print(f"Error saving config: {e}")
    
    def get(self, key: str, default: Any = None) -> Any:
        return self._config.get(key, default)
    
    def set(self, key: str, value: Any) -> None:
        self._config[key] = value
        self._config['last_modified'] = datetime.now().isoformat()
        self.save_config()

# Usage
config = ConfigManager()
config.set('database_url', 'postgresql://localhost:5432/mydb')
db_url = config.get('database_url')

layout: end class: text-center

Thank You!

JSON Mastery Achieved 🎉

Key Takeaways

  • Use orjson for performance-critical applications
  • Always handle JSON errors gracefully
  • Validate input data consistently
  • Consider memory usage with large datasets

Quick Reference

  • json.loads() / json.dumps()
  • Custom encoders for complex objects
  • orjson for 2-6x speed improvement
  • Security: never use eval()

Next Steps

  • Profile your JSON usage
  • Migrate high-traffic endpoints to orjson
  • Implement robust error handling
  • Consider schema validation libraries
Questions? Let's discuss JSON optimization strategies!
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment