Last active
July 2, 2025 18:04
-
-
Save camsaul/38b11fc04d72e023fcf95646221d5caa to your computer and use it in GitHub Desktop.
Map normalization profiling
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns metabase.lib.schema.profile | |
(:require | |
[clojure.pprint] | |
[clojure.set] | |
[criterium.core] | |
[metabase.util :as u] | |
[metabase.util.malli.registry :as mr] | |
[metabase.util.memoize :as u.memo])) | |
(defn normalize-map-no-kebab-case | |
"Part of [[normalize-map]]; converts keys to keywords but DOES NOT convert to `kebab-case`." | |
[m] | |
;; check to make sure we actually need to update anything before we do it. [[update-keys]] always creates new maps | |
;; even if nothing has changed, this way we can avoid creating a bunch of garbage for already-normalized maps | |
(let [m (cond-> m | |
(and (map? m) | |
(some string? (keys m))) | |
(update-keys keyword))] | |
(cond-> m | |
(string? (:lib/type m)) (update :lib/type keyword)))) | |
(def ^:private ^{:arglists '([k])} memoized-kebab-key | |
"Calculating the kebab-case version of a key every time is pretty slow (even with the LRU caching | |
[[u/->kebab-case-en]] has), since the keys here are static and finite we can just memoize them forever and | |
get a nice performance boost." | |
(u.memo/fast-memo (comp u/->kebab-case-en keyword))) | |
(defn kebab-cased-key? | |
"Whether `k` is a keyword that is kebab-cased (as opposed to snake_cased or camelCased)." | |
[k] | |
(and (keyword? k) | |
(= k (memoized-kebab-key k)))) | |
(defn kebab-cased-map? | |
"Whether `m` is a map with all [[kebab-cased-key?]] keys." | |
[m] | |
(and (map? m) | |
(every? kebab-cased-key? (keys m)))) | |
(mr/def ::kebab-cased-map | |
[:fn | |
{:error/message "map with all kebab-cased keys" | |
:error/fn (fn [{:keys [value]} _] | |
(if-not (map? value) | |
"map with all kebab-cased keys" | |
(str "map with all kebab-cased keys, got: " (pr-str (remove kebab-cased-key? (keys value))))))} | |
kebab-cased-map?]) | |
(defn map->kebab-case | |
"Convert a map to kebab case, for use with `:decode/normalize`." | |
[m] | |
(when (map? m) | |
(update-keys m memoized-kebab-key))) | |
(defn normalize-map | |
"Base normalization behavior for a pMBQL map: keywordize keys and keywordize `:lib/type`; convert map to | |
kebab-case." | |
[m] | |
(-> m normalize-map-no-kebab-case map->kebab-case)) | |
(defn into-empty [m] | |
(into | |
(empty m) | |
(map (fn [[k v]] | |
[(memoized-kebab-key k) v])) | |
m)) | |
(defn into-empty-with-kebab-check [m] | |
(if (kebab-cased-map? m) | |
m | |
(into-empty m))) | |
(defn normalize-map-with-kebab-check [m] | |
(if (kebab-cased-map? m) | |
m | |
(normalize-map m))) | |
(defn transduce-transient [m] | |
(transduce | |
identity | |
(fn | |
([m] | |
(persistent! m)) | |
([m k] | |
(let [kebab-key (memoized-kebab-key k)] | |
(if (= k kebab-key) | |
m | |
(let [v (get m k)] | |
(-> m | |
(dissoc! k) | |
(assoc! kebab-key v))))))) | |
(transient m) | |
(keys m))) | |
(defn transduce-transient-with-kebab-check [m] | |
(if (kebab-cased-map? m) | |
m | |
(transduce-transient m))) | |
(defn rename-keys-optimized [m] | |
(if-let [renames (not-empty (into {} | |
(keep (fn [k] | |
(let [kebab-key (memoized-kebab-key k)] | |
(when-not (= k kebab-key) | |
[k kebab-key])))) | |
(keys m)))] | |
(clojure.set/rename-keys m renames) | |
m)) | |
(def map-name->m | |
{:small-mixed-keys {:a 1, "b" 2, :snake_case 3} | |
:small-kebab-case {:a 1, :b 2, :snake-case 3} | |
:large-mixed-keys {:a 1, "b" 2, :snake_case 3, :d 4, :e 5, :f 6, :g 7, :h 8, :i 9, :j 10, :k 11, :l 12, :m 13, :n 14, :o 15, :p 16, :q 17, "R" 18, :s_snake 19, :t_snake 20, :u_snake 21} | |
:large-kebab-case {:a 1, :b 2, :kebab-case 3, :d 4, :e 5, :f 6, :g 7, :h 8, :i 9, :j 10, :k 11, :l 12, :m 13, :n 14, :o 15, :p 16, :q 17, :r 18, :s-kebab 19, :t-kebab 20, :u-kebab 21} | |
:large-one-snake-key {:a 1, :b 2, :snake_case 3, :d 4, :e 5, :f 6, :g 7, :h 8, :i 9, :j 10, :k 11, :l 12, :m 13, :n 14, :o 15, :p 16, :q 17, :r 18, :s-kebab 19, :t-kebab 20, :u-kebab 21}}) | |
(def map-names (keys map-name->m)) | |
(def fns | |
[#'normalize-map | |
#'into-empty | |
#'into-empty-with-kebab-check | |
#'normalize-map-with-kebab-check | |
#'transduce-transient | |
#'transduce-transient-with-kebab-check | |
#'rename-keys-optimized]) | |
(defn- mark-min-max [results] | |
(reduce | |
(fn [results k] | |
(let [min-value (reduce min (map k results)) | |
max-value (reduce max (map k results))] | |
(mapv (fn [m] | |
(update m k (fn [v] | |
(condp = v | |
min-value [:min v] | |
max-value [:max v] | |
v)))) | |
results))) | |
results | |
map-names)) | |
(defn- colorize-min-max [results] | |
(for [m results] | |
(update-vals m (fn [v] | |
(cond | |
(vector? v) | |
(let [color (case (first v) | |
:min :green | |
:max :red)] | |
(u/format-color color (u/format-seconds (second v)))) | |
(number? v) | |
(u/format-color :white (u/format-seconds v)) | |
:else | |
v))))) | |
(defmacro quickbench-mean-seconds [expr] | |
`(first (:mean (criterium.core/quick-benchmark ~expr nil)))) | |
(defn print-results-table [results] | |
;; colorize the map name keywords so the colorized values get indented correctly -- `print-table` doesn't know about | |
;; ANSI escape codes and will treat the strings as longer than they end up printing as | |
(clojure.pprint/print-table (cons :f (for [k map-names] | |
(u/format-color :cyan k))) | |
(for [m results] | |
(clojure.set/rename-keys m (into {} | |
(map (fn [k] | |
[k (u/format-color :cyan k)])) | |
map-names))))) | |
(defn x [] | |
(let [results (mapv (fn [varr] | |
(println varr) | |
(let [f (var-get varr)] | |
(merge | |
{:f (name (symbol varr))} | |
(update-vals map-name->m (fn [m] | |
(quickbench-mean-seconds (f m))))))) | |
fns)] | |
(print-results-table | |
(-> results mark-min-max colorize-min-max)))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn fast-normalize-map | |
"Base normalization behavior for a pMBQL map for use with `:decode/normalize`: keywordize keys and keywordize | |
`:lib/type`; convert map to kebab-case. | |
This is definitely the fastest way of doing this -- we COULD use [[clojure.core/update-keys]] but this is | |
significantly faster -- see my profiling for different implementations in | |
https://gist.github.com/camsaul/38b11fc04d72e023fcf95646221d5caa" | |
[m] | |
(when (map? m) | |
(transduce | |
identity | |
(fn | |
([m] | |
(persistent! m)) | |
([m k] | |
(let [kebab-key (memoized-kebab-key k)] | |
(if (= k kebab-key) | |
m | |
(let [v (get m k) | |
v (cond-> v | |
(and (= kebab-key :lib/type) | |
(string? v)) | |
keyword)] | |
(-> m | |
(dissoc! k) | |
(assoc! kebab-key v))))))) | |
(transient m) | |
(keys m)))) | |
(defn fast-normalize-map-2 | |
"Base normalization behavior for a pMBQL map for use with `:decode/normalize`: keywordize keys and keywordize | |
`:lib/type`; convert map to kebab-case. | |
This is definitely the fastest way of doing this -- we COULD use [[clojure.core/update-keys]] but this is | |
significantly faster -- see my profiling for different implementations in | |
https://gist.github.com/camsaul/38b11fc04d72e023fcf95646221d5caa" | |
[m] | |
(when (map? m) | |
(transduce | |
identity | |
(fn | |
([m] | |
(persistent! m)) | |
([m [k v]] | |
(let [kebab-key (memoized-kebab-key k)] | |
(if (= k kebab-key) | |
m | |
(let [v (cond-> v | |
(and (= kebab-key :lib/type) | |
(string? v)) | |
keyword)] | |
(-> m | |
(dissoc! k) | |
(assoc! kebab-key v))))))) | |
(transient m) | |
m))) | |
(defn fast-normalize-map-3 | |
"Base normalization behavior for a pMBQL map for use with `:decode/normalize`: keywordize keys and keywordize | |
`:lib/type`; convert map to kebab-case. | |
This is definitely the fastest way of doing this -- we COULD use [[clojure.core/update-keys]] but this is | |
significantly faster -- see my profiling for different implementations in | |
https://gist.github.com/camsaul/38b11fc04d72e023fcf95646221d5caa" | |
[m] | |
(when (map? m) | |
(transduce | |
identity | |
(fn | |
([m] | |
(let [lib-type (:lib/type m) | |
m (cond-> m | |
(string? lib-type) (assoc! :lib/type (keyword lib-type)))] | |
(persistent! m))) | |
([m [k v]] | |
(let [kebab-key (memoized-kebab-key k)] | |
(if (= k kebab-key) | |
m | |
(-> m | |
(dissoc! k) | |
(assoc! kebab-key v)))))) | |
(transient m) | |
m))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment