Skip to content

Instantly share code, notes, and snippets.

@camsaul
Last active July 2, 2025 18:04
Show Gist options
  • Save camsaul/38b11fc04d72e023fcf95646221d5caa to your computer and use it in GitHub Desktop.
Save camsaul/38b11fc04d72e023fcf95646221d5caa to your computer and use it in GitHub Desktop.
Map normalization profiling
(ns metabase.lib.schema.profile
(:require
[clojure.pprint]
[clojure.set]
[criterium.core]
[metabase.util :as u]
[metabase.util.malli.registry :as mr]
[metabase.util.memoize :as u.memo]))
(defn normalize-map-no-kebab-case
"Part of [[normalize-map]]; converts keys to keywords but DOES NOT convert to `kebab-case`."
[m]
;; check to make sure we actually need to update anything before we do it. [[update-keys]] always creates new maps
;; even if nothing has changed, this way we can avoid creating a bunch of garbage for already-normalized maps
(let [m (cond-> m
(and (map? m)
(some string? (keys m)))
(update-keys keyword))]
(cond-> m
(string? (:lib/type m)) (update :lib/type keyword))))
(def ^:private ^{:arglists '([k])} memoized-kebab-key
"Calculating the kebab-case version of a key every time is pretty slow (even with the LRU caching
[[u/->kebab-case-en]] has), since the keys here are static and finite we can just memoize them forever and
get a nice performance boost."
(u.memo/fast-memo (comp u/->kebab-case-en keyword)))
(defn kebab-cased-key?
"Whether `k` is a keyword that is kebab-cased (as opposed to snake_cased or camelCased)."
[k]
(and (keyword? k)
(= k (memoized-kebab-key k))))
(defn kebab-cased-map?
"Whether `m` is a map with all [[kebab-cased-key?]] keys."
[m]
(and (map? m)
(every? kebab-cased-key? (keys m))))
(mr/def ::kebab-cased-map
[:fn
{:error/message "map with all kebab-cased keys"
:error/fn (fn [{:keys [value]} _]
(if-not (map? value)
"map with all kebab-cased keys"
(str "map with all kebab-cased keys, got: " (pr-str (remove kebab-cased-key? (keys value))))))}
kebab-cased-map?])
(defn map->kebab-case
"Convert a map to kebab case, for use with `:decode/normalize`."
[m]
(when (map? m)
(update-keys m memoized-kebab-key)))
(defn normalize-map
"Base normalization behavior for a pMBQL map: keywordize keys and keywordize `:lib/type`; convert map to
kebab-case."
[m]
(-> m normalize-map-no-kebab-case map->kebab-case))
(defn into-empty [m]
(into
(empty m)
(map (fn [[k v]]
[(memoized-kebab-key k) v]))
m))
(defn into-empty-with-kebab-check [m]
(if (kebab-cased-map? m)
m
(into-empty m)))
(defn normalize-map-with-kebab-check [m]
(if (kebab-cased-map? m)
m
(normalize-map m)))
(defn transduce-transient [m]
(transduce
identity
(fn
([m]
(persistent! m))
([m k]
(let [kebab-key (memoized-kebab-key k)]
(if (= k kebab-key)
m
(let [v (get m k)]
(-> m
(dissoc! k)
(assoc! kebab-key v)))))))
(transient m)
(keys m)))
(defn transduce-transient-with-kebab-check [m]
(if (kebab-cased-map? m)
m
(transduce-transient m)))
(defn rename-keys-optimized [m]
(if-let [renames (not-empty (into {}
(keep (fn [k]
(let [kebab-key (memoized-kebab-key k)]
(when-not (= k kebab-key)
[k kebab-key]))))
(keys m)))]
(clojure.set/rename-keys m renames)
m))
(def map-name->m
{:small-mixed-keys {:a 1, "b" 2, :snake_case 3}
:small-kebab-case {:a 1, :b 2, :snake-case 3}
:large-mixed-keys {:a 1, "b" 2, :snake_case 3, :d 4, :e 5, :f 6, :g 7, :h 8, :i 9, :j 10, :k 11, :l 12, :m 13, :n 14, :o 15, :p 16, :q 17, "R" 18, :s_snake 19, :t_snake 20, :u_snake 21}
:large-kebab-case {:a 1, :b 2, :kebab-case 3, :d 4, :e 5, :f 6, :g 7, :h 8, :i 9, :j 10, :k 11, :l 12, :m 13, :n 14, :o 15, :p 16, :q 17, :r 18, :s-kebab 19, :t-kebab 20, :u-kebab 21}
:large-one-snake-key {:a 1, :b 2, :snake_case 3, :d 4, :e 5, :f 6, :g 7, :h 8, :i 9, :j 10, :k 11, :l 12, :m 13, :n 14, :o 15, :p 16, :q 17, :r 18, :s-kebab 19, :t-kebab 20, :u-kebab 21}})
(def map-names (keys map-name->m))
(def fns
[#'normalize-map
#'into-empty
#'into-empty-with-kebab-check
#'normalize-map-with-kebab-check
#'transduce-transient
#'transduce-transient-with-kebab-check
#'rename-keys-optimized])
(defn- mark-min-max [results]
(reduce
(fn [results k]
(let [min-value (reduce min (map k results))
max-value (reduce max (map k results))]
(mapv (fn [m]
(update m k (fn [v]
(condp = v
min-value [:min v]
max-value [:max v]
v))))
results)))
results
map-names))
(defn- colorize-min-max [results]
(for [m results]
(update-vals m (fn [v]
(cond
(vector? v)
(let [color (case (first v)
:min :green
:max :red)]
(u/format-color color (u/format-seconds (second v))))
(number? v)
(u/format-color :white (u/format-seconds v))
:else
v)))))
(defmacro quickbench-mean-seconds [expr]
`(first (:mean (criterium.core/quick-benchmark ~expr nil))))
(defn print-results-table [results]
;; colorize the map name keywords so the colorized values get indented correctly -- `print-table` doesn't know about
;; ANSI escape codes and will treat the strings as longer than they end up printing as
(clojure.pprint/print-table (cons :f (for [k map-names]
(u/format-color :cyan k)))
(for [m results]
(clojure.set/rename-keys m (into {}
(map (fn [k]
[k (u/format-color :cyan k)]))
map-names)))))
(defn x []
(let [results (mapv (fn [varr]
(println varr)
(let [f (var-get varr)]
(merge
{:f (name (symbol varr))}
(update-vals map-name->m (fn [m]
(quickbench-mean-seconds (f m)))))))
fns)]
(print-results-table
(-> results mark-min-max colorize-min-max))))
(defn fast-normalize-map
"Base normalization behavior for a pMBQL map for use with `:decode/normalize`: keywordize keys and keywordize
`:lib/type`; convert map to kebab-case.
This is definitely the fastest way of doing this -- we COULD use [[clojure.core/update-keys]] but this is
significantly faster -- see my profiling for different implementations in
https://gist.github.com/camsaul/38b11fc04d72e023fcf95646221d5caa"
[m]
(when (map? m)
(transduce
identity
(fn
([m]
(persistent! m))
([m k]
(let [kebab-key (memoized-kebab-key k)]
(if (= k kebab-key)
m
(let [v (get m k)
v (cond-> v
(and (= kebab-key :lib/type)
(string? v))
keyword)]
(-> m
(dissoc! k)
(assoc! kebab-key v)))))))
(transient m)
(keys m))))
(defn fast-normalize-map-2
"Base normalization behavior for a pMBQL map for use with `:decode/normalize`: keywordize keys and keywordize
`:lib/type`; convert map to kebab-case.
This is definitely the fastest way of doing this -- we COULD use [[clojure.core/update-keys]] but this is
significantly faster -- see my profiling for different implementations in
https://gist.github.com/camsaul/38b11fc04d72e023fcf95646221d5caa"
[m]
(when (map? m)
(transduce
identity
(fn
([m]
(persistent! m))
([m [k v]]
(let [kebab-key (memoized-kebab-key k)]
(if (= k kebab-key)
m
(let [v (cond-> v
(and (= kebab-key :lib/type)
(string? v))
keyword)]
(-> m
(dissoc! k)
(assoc! kebab-key v)))))))
(transient m)
m)))
(defn fast-normalize-map-3
"Base normalization behavior for a pMBQL map for use with `:decode/normalize`: keywordize keys and keywordize
`:lib/type`; convert map to kebab-case.
This is definitely the fastest way of doing this -- we COULD use [[clojure.core/update-keys]] but this is
significantly faster -- see my profiling for different implementations in
https://gist.github.com/camsaul/38b11fc04d72e023fcf95646221d5caa"
[m]
(when (map? m)
(transduce
identity
(fn
([m]
(let [lib-type (:lib/type m)
m (cond-> m
(string? lib-type) (assoc! :lib/type (keyword lib-type)))]
(persistent! m)))
([m [k v]]
(let [kebab-key (memoized-kebab-key k)]
(if (= k kebab-key)
m
(-> m
(dissoc! k)
(assoc! kebab-key v))))))
(transient m)
m)))
@camsaul
Copy link
Author

camsaul commented Jul 2, 2025

image

@camsaul
Copy link
Author

camsaul commented Jul 2, 2025

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment