Skip to content

Instantly share code, notes, and snippets.

@WalBeh
Last active April 1, 2025 12:53
Show Gist options
  • Select an option

  • Save WalBeh/6f8029e772d7656bf03a3d800c40e2e9 to your computer and use it in GitHub Desktop.

Select an option

Save WalBeh/6f8029e772d7656bf03a3d800c40e2e9 to your computer and use it in GitHub Desktop.
import json
import re
# Load the JSON data
with open('cratedb_settings.json', 'r') as f:
settings = json.load(f)
# Print header
print(f"SQL Statements for Runtime Configurable CrateDB Settings")
print("=" * 60)
# Count statements
statement_count = 0
# Print all statements with comments and semicolons
for setting_key, setting_info in settings.items():
if "stmt" in setting_info:
# Get the basic statement and change SET CLUSTER to SET GLOBAL PERSISTENT
stmt = setting_info['stmt'].replace("SET CLUSTER", "SET GLOBAL PERSISTENT")
# Add semicolon
if not stmt.endswith(';'):
stmt += ';'
# Try to find configuration options for commenting
config_options = ""
# Check constraints field
if "constraints" in setting_info and setting_info["constraints"]:
config_options = setting_info["constraints"]
# If no explicit constraints, check raw_description for allowed values
elif "raw_description" in setting_info:
# Look for patterns like "Allowed values: option1 | option2 | option3"
allowed_match = re.search(r'allowed values:?\s*([a-zA-Z0-9_\-]+((\s*\|\s*)[a-zA-Z0-9_\-]+)+)',
setting_info["raw_description"],
re.IGNORECASE)
if allowed_match:
options_list = allowed_match.group(1).strip()
config_options = f"Allowed values: {options_list}"
# Add the comment if we found config options
if config_options:
stmt = f"{stmt} -- {config_options}"
print(stmt)
statement_count += 1
print(f"\nTotal statements: {statement_count}")
❯ uv run ./print_sql.py
SQL Statements for Runtime Configurable CrateDB Settings
============================================================
SET GLOBAL PERSISTENT "stats.enabled" = 'true';
SET GLOBAL PERSISTENT "stats.jobs_log_size" = 10000; -- maximum number of job records kept to be kept in the sys.jobs_log table on each node.
SET GLOBAL PERSISTENT "stats.jobs_log_expiration" = '0s (disabled)';
SET GLOBAL PERSISTENT "stats.jobs_log_filter" = 'true (Include everything)'; -- mine if a job should be
SET GLOBAL PERSISTENT "stats.jobs_log_persistent_filter" = 'false (Include nothing)'; -- mine if a job should also be recorded to the regular
SET GLOBAL PERSISTENT "stats.operations_log_size" = 10000; -- maximum number of operations records to be kept in the
SET GLOBAL PERSISTENT "stats.operations_log_expiration" = '0s (disabled)';
SET GLOBAL PERSISTENT "stats.service.interval" = '24h';
SET GLOBAL PERSISTENT "stats.service.max_bytes_per_sec" = '40mb'; -- maximum number of bytes per second that can be read on data
SET GLOBAL PERSISTENT "cluster.max_shards_per_node" = 1000; -- maximum number of open primary and replica shards per node. This setting
SET GLOBAL PERSISTENT "cluster.graceful_stop.min_availability" = 'primaries'; -- minimum data availability is required. The node may shut down
SET GLOBAL PERSISTENT "cluster.graceful_stop.timeout" = '2h'; -- maximum waiting time in milliseconds for the reallocation process to finish. The force setting will
SET GLOBAL PERSISTENT "cluster.graceful_stop.force" = 'false';
SET GLOBAL PERSISTENT "bulk.request_timeout" = '1m';
SET GLOBAL PERSISTENT "cluster.routing.allocation.enable" = 'all'; -- Allowed values: all | none | primaries | new_primaries
SET GLOBAL PERSISTENT "cluster.routing.rebalance.enable" = 'all'; -- Allowed values: all | none | primaries | replicas
SET GLOBAL PERSISTENT "cluster.routing.allocation.allow_rebalance" = 'indices_all_active'; -- Allowed values: always | indices_primary_active | indices_all_active
SET GLOBAL PERSISTENT "cluster.routing.allocation.cluster_concurrent_rebalance" = 2;
SET GLOBAL PERSISTENT "cluster.routing.allocation.node_initial_primaries_recoveries" = 4;
SET GLOBAL PERSISTENT "cluster.routing.allocation.node_concurrent_recoveries" = 2;
SET GLOBAL PERSISTENT "cluster.routing.allocation.balance.shard" = 0;
SET GLOBAL PERSISTENT "cluster.routing.allocation.balance.index" = 0;
SET GLOBAL PERSISTENT "cluster.routing.allocation.balance.threshold" = 1; -- Minimal optimization value of operations that should be performed (non
SET GLOBAL PERSISTENT "cluster.routing.allocation.include.*" = '';
SET GLOBAL PERSISTENT "cluster.routing.allocation.exclude.*" = '';
SET GLOBAL PERSISTENT "cluster.routing.allocation.require.*" = '';
SET GLOBAL PERSISTENT "cluster.routing.allocation.disk.threshold_enabled" = 'true';
SET GLOBAL PERSISTENT "cluster.routing.allocation.disk.watermark.low" = '85%';
SET GLOBAL PERSISTENT "cluster.routing.allocation.disk.watermark.high" = '90%';
SET GLOBAL PERSISTENT "cluster.routing.allocation.disk.watermark.flood_stage" = '95%';
SET GLOBAL PERSISTENT "cluster.routing.allocation.total_shards_per_node" = -1;
SET GLOBAL PERSISTENT "indices.recovery.max_bytes_per_sec" = '40mb'; -- maximum number of bytes that can be transferred during
SET GLOBAL PERSISTENT "indices.recovery.retry_delay_state_sync" = '500ms';
SET GLOBAL PERSISTENT "indices.recovery.retry_delay_network" = '5s';
SET GLOBAL PERSISTENT "indices.recovery.internal_action_timeout" = '15m';
SET GLOBAL PERSISTENT "indices.recovery.internal_action_long_timeout" = '30m';
SET GLOBAL PERSISTENT "indices.recovery.recovery_activity_timeout" = '30m';
SET GLOBAL PERSISTENT "indices.recovery.max_concurrent_file_chunks" = 2; -- max_bytes_per_sec, but is CPU-bound instead, typically
SET GLOBAL PERSISTENT "memory.allocation.type" = 'on-heap';
SET GLOBAL PERSISTENT "memory.operation_limit" = 0;
SET GLOBAL PERSISTENT "indices.breaker.query.limit" = '60%';
SET GLOBAL PERSISTENT "indices.breaker.request.limit" = '60%';
SET GLOBAL PERSISTENT "indices.breaker.accounting.limit" = '100%';
SET GLOBAL PERSISTENT "stats.breaker.log.jobs.limit" = '5%'; -- maximum memory that can be used from CRATE_HEAP_SIZE for the sys.jobs_log table on each
SET GLOBAL PERSISTENT "stats.breaker.log.operations.limit" = '5%'; -- maximum memory that can be used from CRATE_HEAP_SIZE for the sys.operations_log table on
SET GLOBAL PERSISTENT "indices.breaker.total.limit" = '95%'; -- maximum memory that can be used by all aforementioned circuit breakers
SET GLOBAL PERSISTENT "overload_protection.dml.initial_concurrency" = 5;
SET GLOBAL PERSISTENT "overload_protection.dml.min_concurrency" = 1;
SET GLOBAL PERSISTENT "overload_protection.dml.max_concurrency" = 100;
SET GLOBAL PERSISTENT "overload_protection.dml.queue_size" = 25;
SET GLOBAL PERSISTENT "cluster.info.update.interval" = '30s';
SET GLOBAL PERSISTENT "replication.logical.ops_batch_size" = 50000; -- Min value: 16
SET GLOBAL PERSISTENT "replication.logical.reads_poll_duration" = 50;
SET GLOBAL PERSISTENT "replication.logical.recovery.chunk_size" = '1MB'; -- Min value: 1KB
SET GLOBAL PERSISTENT "replication.logical.recovery.max_concurrent_file_chunks" = 2; -- Min value: 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment