Skip to content

Instantly share code, notes, and snippets.

@jamesonknutson
Last active October 14, 2024 07:28
Show Gist options
  • Save jamesonknutson/94bd077996c528500ae5cd39f7790650 to your computer and use it in GitHub Desktop.
Save jamesonknutson/94bd077996c528500ae5cd39f7790650 to your computer and use it in GitHub Desktop.
groups.nu
use std
def nested-type [
--keep-streams(-k) # If the input is a stream, should `stream` be returned, or should `.subtype.type` be returned?
] {
match $in {
{ type: 'stream', subtype: $subtype } => {
if not $keep_streams {
$subtype | nested-type --keep-streams=($keep_streams)
} else {
'stream'
}
}
{ type: 'stream' } => {
if not $keep_streams {
error make { msg: $'No subtype for this stream?' }
} else {
'stream'
}
}
{ type: $type } => $type
$type => $type
}
}
def type [
--keep-streams(-k) # If the input is a stream, should `stream` be returned, or should `.subtype.type` be returned?
--no-collect(-n) # Do not collect the input
] {
describe --detailed --no-collect=($no_collect) | nested-type --keep-streams=($keep_streams)
}
# Creates a grouper from the given input.
# Returns a closure.
def create-grouper [
--ignore-errors(-i)
--default(-d): any = 'null'
] {
let input = $in
let type = $input | type
# If the input type is a closure, then we can wrap the closure and use that.
#
# If the input type is a cell-path, string, int, or record, we want to convert
# that into a list of cell-path records, create a cell-path out of it, then use
# that in a new closure.
#
match $type {
'closure' => {
{|value|
$value
| do --ignore-errors=($ignore_errors) $input $value
| if ($default != null) { $in | default $default } else { $in }
}
}
'cell-path' => {
{|value|
$value
| get --ignore-errors=($ignore_errors) $input
| if ($default != null) { $in | default $default } else { $in }
}
}
'int' => {
[ { value: $input, optional: false } ]
| create-grouper --ignore-errors=($ignore_errors) --default=($default)
}
'string' => {
$input
| split row '.'
| each {|s| str replace -r '\?$' '' | { value: $in, optional: ($in != $s) } }
| create-grouper --ignore-errors=($ignore_errors) --default=($default)
}
'record' => {
# Pass the value back into create-grouper, wrapped in a list.
[ { value: $input.value, optional: ($input.optional? == true) } ]
| create-grouper --ignore-errors=($ignore_errors) --default=($default)
}
'list' => {
# Convert all the elements into a record
$input
| each {|element|
match ($element | type) {
'record' => {
[ { value: $element.value, optional: ($element.optional? == true) } ]
}
'int' => {
[ { value: $element, optional: false } ]
}
'string' => {
$element
| split row '.'
| each {|s| str replace -r '\?$' '' | { value: $in, optional: ($in != $s) } }
}
'cell-path' => {
$element
| to text
| split row '.'
| each {|s| str replace -r '\?$' '' | { value: $in, optional: ($in != $s) } }
}
$other => {
error make {
msg: $'Cannot create a grouper out of an element with type: ($element)',
label: {
text: $'This should be of type: record, int, string, or cell-path.'
span: (metadata $element).span
}
}
}
}
}
| flatten
| into cell-path
| create-grouper --ignore-errors=($ignore_errors) --default=($default)
}
$other => {
error make {
msg: $'Cannot create a grouper using a value with type: ($other)',
label: {
text: $'Expected type: closure, cell-path, in, string, record, or list',
span: (metadata $input).span
}
}
}
}
}
# Recursively groups some values, using the specified grouping conditions to do so.
# Grouping conditions, if not a closure, are coerced into a cell-path. So, the `groupers`
# provided can be either a closure (where the closure is expected to return a value that can
# be used as the "grouping" term for that input), or anything that can be coerced into
# a cell-path (meaning cell-path, string, int, record<value: any, optional?: boolean>, or a list
# of any of thereof).
#
# Examples:
# ```nushell
# # Group the files in the current directory first by their type, then by their extension.
# > ls | merge ($in.name | path parse) | groups type extension
# ```
export def main [
--ignore-errors(-i) # Whether or not to ignore errors
--default(-d): any = 'null' # The default value to use, in the event that a grouper returns an error, or null.
...groupers: any
# - cell-path
# - int
# - string
# - record<value: any, optional: bool>
# - list<int | string | cell-path | record<value: any, optional: bool>>
# - closure
# - list<closure>
]: [
list -> any
table -> any
] {
let input = $in
let groupers = $groupers | each { create-grouper --ignore-errors=($ignore_errors) --default=($default) }
def apply-closure [ fn: closure ] {
reduce --fold {} {|row,acc|
let key = $row | do --ignore-errors=($ignore_errors) $fn $row
let hash = $key | to msgpack | to text | hash md5
$acc | upsert $hash { default { key: $key, value: [] } | upsert value { default [] | append [ $row ] } }
} | values
| upsert count {|row| $row.value | length }
| sort-by count -r
}
$groupers
| enumerate
| reduce --fold $input {|idx_row,acc|
if ($idx_row.index == 0) {
$acc | apply-closure $idx_row.item
} else {
$acc | upsert ('value' | std repeat $idx_row.index | into cell-path) {
apply-closure $idx_row.item
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment