Skip to content

Instantly share code, notes, and snippets.

@eddyg
Last active February 9, 2025 16:34
Show Gist options
  • Save eddyg/ca159a3606301d257f47d67afd05eef0 to your computer and use it in GitHub Desktop.
Save eddyg/ca159a3606301d257f47d67afd05eef0 to your computer and use it in GitHub Desktop.
Simple Python script to analyze AdGuard Home's `querylog.json` file(s)
#!/usr/bin/env -S uv --quiet run --script
# /// script
# requires-python = ">=3.8"
# dependencies = ["pandas"]
# ///
import pandas as pd
import json
from pathlib import Path
def process_log(filename):
data = []
count = 0
try:
with open(filename, 'r') as f:
for line in f:
count += 1
if count % 100000 == 0:
print(f"\rProcessing queries in {filename}: {count:,}", end='', flush=True)
try:
entry = json.loads(line)
if entry.get('Result') == {} and entry.get('Cached') is None:
if (elapsed := entry.get('Elapsed')) is not None:
data.append({
'elapsed_ms': elapsed / 1_000_000,
'upstream': entry.get('Upstream'),
'query_type': entry.get('QT')
})
except json.JSONDecodeError:
continue
print(f"\rQueries processed in {filename}: {count:,}")
print(f"\rUncached, unfiltered queries included from {filename}: {len(data):,}")
return pd.DataFrame(data)
except FileNotFoundError:
print(f"File not found: {filename}")
return pd.DataFrame()
log_files = ['querylog.json.1', 'querylog.json']
dataframes = []
for log_file in log_files:
if Path(log_file).exists():
df = process_log(log_file)
if not df.empty:
dataframes.append(df)
if dataframes:
df = pd.concat(dataframes, ignore_index=True)
else:
print("No valid log files found to analyze")
exit(1)
response_times = df['elapsed_ms']
stats = {
'mean': response_times.mean(),
'median': response_times.median(),
'std_dev': response_times.std(),
'p95': response_times.quantile(0.95),
'p99': response_times.quantile(0.99),
'max': response_times.max(),
'min': response_times.min()
}
# Find outliers using 1.5 IQR method
Q1 = response_times.quantile(0.25)
Q3 = response_times.quantile(0.75)
IQR = Q3 - Q1
outliers = response_times[(response_times < (Q1 - 1.5 * IQR)) | (response_times > (Q3 + 1.5 * IQR))]
print("\nDNS Query Response Time Statistics (in milliseconds):")
for stat, value in stats.items():
print(f"{stat.replace('_', ' ').title():15}: {value:10.2f} ms")
print(f"\nNumber of outliers detected: {len(outliers)}")
percentiles = [25, 50, 75, 90, 95, 99]
print("\nOutlier percentiles:")
for p in percentiles:
print(f"P{p:2}: {outliers.quantile(p/100):10.2f} ms")
ranges = [(0, 100), (100, 500), (500, 1000), (1000, 5000), (5000, float('inf'))]
print("\nResponse time ranges:")
for start, end in ranges:
count = len(response_times[(response_times >= start) & (response_times < end)])
percentage = (count / len(response_times)) * 100
print(f"{start:5.0f} - {end:5.0f} ms: {count:10,d} queries ({percentage:5.1f}%)")
print("\nResponse Times by Upstream Server:")
upstream_stats = df.groupby('upstream')['elapsed_ms'].agg([
'count',
'mean',
'median',
'std',
lambda x: x.quantile(0.95),
'max'
]).round(2)
upstream_stats.columns = ['Count', 'Mean (ms)', 'Median (ms)', 'Std (ms)', 'P95 (ms)', 'Max (ms)']
print(upstream_stats.sort_values('Count', ascending=False))
print("\nResponse Times by Query Type:")
qt_stats = df.groupby('query_type')['elapsed_ms'].agg([
'count',
'mean',
'median',
'std',
lambda x: x.quantile(0.95),
'max'
]).round(2)
qt_stats.columns = ['Count', 'Mean (ms)', 'Median (ms)', 'Std (ms)', 'P95 (ms)', 'Max (ms)']
print(qt_stats.sort_values('Count', ascending=False))
print("\nSlowest Upstreams (by median response time):")
print(upstream_stats.sort_values('Median (ms)', ascending=False).head())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment