Last active
October 14, 2024 07:28
-
-
Save jamesonknutson/94bd077996c528500ae5cd39f7790650 to your computer and use it in GitHub Desktop.
groups.nu
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std | |
def nested-type [ | |
--keep-streams(-k) # If the input is a stream, should `stream` be returned, or should `.subtype.type` be returned? | |
] { | |
match $in { | |
{ type: 'stream', subtype: $subtype } => { | |
if not $keep_streams { | |
$subtype | nested-type --keep-streams=($keep_streams) | |
} else { | |
'stream' | |
} | |
} | |
{ type: 'stream' } => { | |
if not $keep_streams { | |
error make { msg: $'No subtype for this stream?' } | |
} else { | |
'stream' | |
} | |
} | |
{ type: $type } => $type | |
$type => $type | |
} | |
} | |
def type [ | |
--keep-streams(-k) # If the input is a stream, should `stream` be returned, or should `.subtype.type` be returned? | |
--no-collect(-n) # Do not collect the input | |
] { | |
describe --detailed --no-collect=($no_collect) | nested-type --keep-streams=($keep_streams) | |
} | |
# Creates a grouper from the given input. | |
# Returns a closure. | |
def create-grouper [ | |
--ignore-errors(-i) | |
--default(-d): any = 'null' | |
] { | |
let input = $in | |
let type = $input | type | |
# If the input type is a closure, then we can wrap the closure and use that. | |
# | |
# If the input type is a cell-path, string, int, or record, we want to convert | |
# that into a list of cell-path records, create a cell-path out of it, then use | |
# that in a new closure. | |
# | |
match $type { | |
'closure' => { | |
{|value| | |
$value | |
| do --ignore-errors=($ignore_errors) $input $value | |
| if ($default != null) { $in | default $default } else { $in } | |
} | |
} | |
'cell-path' => { | |
{|value| | |
$value | |
| get --ignore-errors=($ignore_errors) $input | |
| if ($default != null) { $in | default $default } else { $in } | |
} | |
} | |
'int' => { | |
[ { value: $input, optional: false } ] | |
| create-grouper --ignore-errors=($ignore_errors) --default=($default) | |
} | |
'string' => { | |
$input | |
| split row '.' | |
| each {|s| str replace -r '\?$' '' | { value: $in, optional: ($in != $s) } } | |
| create-grouper --ignore-errors=($ignore_errors) --default=($default) | |
} | |
'record' => { | |
# Pass the value back into create-grouper, wrapped in a list. | |
[ { value: $input.value, optional: ($input.optional? == true) } ] | |
| create-grouper --ignore-errors=($ignore_errors) --default=($default) | |
} | |
'list' => { | |
# Convert all the elements into a record | |
$input | |
| each {|element| | |
match ($element | type) { | |
'record' => { | |
[ { value: $element.value, optional: ($element.optional? == true) } ] | |
} | |
'int' => { | |
[ { value: $element, optional: false } ] | |
} | |
'string' => { | |
$element | |
| split row '.' | |
| each {|s| str replace -r '\?$' '' | { value: $in, optional: ($in != $s) } } | |
} | |
'cell-path' => { | |
$element | |
| to text | |
| split row '.' | |
| each {|s| str replace -r '\?$' '' | { value: $in, optional: ($in != $s) } } | |
} | |
$other => { | |
error make { | |
msg: $'Cannot create a grouper out of an element with type: ($element)', | |
label: { | |
text: $'This should be of type: record, int, string, or cell-path.' | |
span: (metadata $element).span | |
} | |
} | |
} | |
} | |
} | |
| flatten | |
| into cell-path | |
| create-grouper --ignore-errors=($ignore_errors) --default=($default) | |
} | |
$other => { | |
error make { | |
msg: $'Cannot create a grouper using a value with type: ($other)', | |
label: { | |
text: $'Expected type: closure, cell-path, in, string, record, or list', | |
span: (metadata $input).span | |
} | |
} | |
} | |
} | |
} | |
# Recursively groups some values, using the specified grouping conditions to do so. | |
# Grouping conditions, if not a closure, are coerced into a cell-path. So, the `groupers` | |
# provided can be either a closure (where the closure is expected to return a value that can | |
# be used as the "grouping" term for that input), or anything that can be coerced into | |
# a cell-path (meaning cell-path, string, int, record<value: any, optional?: boolean>, or a list | |
# of any of thereof). | |
# | |
# Examples: | |
# ```nushell | |
# # Group the files in the current directory first by their type, then by their extension. | |
# > ls | merge ($in.name | path parse) | groups type extension | |
# ``` | |
export def main [ | |
--ignore-errors(-i) # Whether or not to ignore errors | |
--default(-d): any = 'null' # The default value to use, in the event that a grouper returns an error, or null. | |
...groupers: any | |
# - cell-path | |
# - int | |
# - string | |
# - record<value: any, optional: bool> | |
# - list<int | string | cell-path | record<value: any, optional: bool>> | |
# - closure | |
# - list<closure> | |
]: [ | |
list -> any | |
table -> any | |
] { | |
let input = $in | |
let groupers = $groupers | each { create-grouper --ignore-errors=($ignore_errors) --default=($default) } | |
def apply-closure [ fn: closure ] { | |
reduce --fold {} {|row,acc| | |
let key = $row | do --ignore-errors=($ignore_errors) $fn $row | |
let hash = $key | to msgpack | to text | hash md5 | |
$acc | upsert $hash { default { key: $key, value: [] } | upsert value { default [] | append [ $row ] } } | |
} | values | |
| upsert count {|row| $row.value | length } | |
| sort-by count -r | |
} | |
$groupers | |
| enumerate | |
| reduce --fold $input {|idx_row,acc| | |
if ($idx_row.index == 0) { | |
$acc | apply-closure $idx_row.item | |
} else { | |
$acc | upsert ('value' | std repeat $idx_row.index | into cell-path) { | |
apply-closure $idx_row.item | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment