Last active
November 23, 2024 16:25
-
-
Save jabney/71908dd3cbf370ebcae4a56c2e31c100 to your computer and use it in GitHub Desktop.
A minimally opinionated argument parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* A minimally opinionated argument parser. | |
* | |
* Author: James Abney | |
* License: MIT | |
*/ | |
import { Tokenizer, TokenSpec } from './tokenizer' | |
const tokens: TokenSpec<TokenType>[] = [ | |
[null, /^\s+/], // whitespace | |
['long', /^--[A-Za-z][\w-]+/], | |
['short', /^-[A-Za-z]+/], | |
['var', /^[A-Za-z]\w*=[^\s=]*/], | |
['stop', /^--(?=\s|$)/], | |
['arg', /^\S+/], | |
] | |
type TokenType = 'long' | 'short' | 'var' | 'stop' | 'arg' | |
type KeyVal<T = string> = { [key: string]: T } | |
/** | |
* @throws {SyntaxError} | |
*/ | |
export function parseArgs(source: string) { | |
const t = new Tokenizer<TokenType>(source, tokens) | |
const arg: string[] = [] | |
const long: KeyVal<string[]>[] = [] | |
const short: KeyVal<string[]>[] = [] | |
const stop: string[] = [] | |
const _var: KeyVal[] = [] | |
while (t.lookahead != null) { | |
const token = t.consumeN() | |
switch (token.type) { | |
case 'arg': | |
arg.push(token.value) | |
break | |
case 'long': | |
long.push({ [token.value.slice(2)]: [...consumeArgs(t)] }) | |
break | |
case 'short': | |
short.push({ [token.value.slice(1)]: [...consumeArgs(t)] }) | |
break | |
case 'var': | |
_var.push(Object.fromEntries([token.value.split('=')])) | |
break | |
case 'stop': | |
stop.push(...consumeArgs(t)) | |
break | |
default: | |
throw new SyntaxError(`unrecognized token type "${token.type}"`) | |
} | |
} | |
return { arg, long, short, var: _var, stop } | |
} | |
function* consumeArgs(t: Tokenizer<TokenType>): IterableIterator<string> { | |
let count = 0 | |
while (t.lookahead?.type === 'arg') { | |
const token = t.consumeN() | |
yield token.value | |
count += 1 | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* A single token lookahead tokenizer. | |
* | |
* Author: James Abney | |
* License: MIT | |
*/ | |
export type Token<T extends string = string> = { readonly type: T; readonly value: string } | |
export type TokenSpec<T extends string = string> = readonly [name: T | null, match: RegExp] // empty name tokens are thrown away. | |
export class Tokenizer<T extends string = string> { | |
private readonly tokens: readonly TokenSpec<T>[] | |
private readonly _source: string | |
private _cursor = 0 | |
private _lookahead: Token<T> | null = null | |
constructor(source: string, tokens: TokenSpec<T>[]) { | |
this._source = source | |
this.tokens = tokens.slice() | |
this._lookahead = this._consume() | |
} | |
get source(): string { | |
return this._source | |
} | |
get cursor(): number { | |
return this._cursor | |
} | |
get eof(): boolean { | |
return this._cursor >= this._source.length | |
} | |
get lookahead(): Token<T> | null { | |
return this._lookahead | |
} | |
/** | |
* Consumes a token and returns the previous lookahead. If a token | |
* type is specified, it must match the lookahead token's type. | |
* | |
* @throws {SyntaxError} | |
*/ | |
consume(): Token<T> | null | |
consume(type: T): Token<T> | null | |
consume(type?: T): Token<T> | null { | |
const token = this._lookahead | |
this._lookahead = this._consume() | |
if (type == null) { | |
return token | |
} | |
if (token == null) { | |
return null | |
} | |
if (token.type === type) { | |
return token | |
} | |
throw new SyntaxError(`expected token "${type}", received "${token.type}"`) | |
} | |
/** | |
* Asserts that the lookahead token is not null. | |
* | |
* @throws {SyntaxError} | |
*/ | |
consumeN(): Token<T> { | |
const token = this._lookahead | |
this._lookahead = this._consume() | |
if (token == null) { | |
throw new SyntaxError(`token is null`) | |
} | |
return token | |
} | |
private _consume(): Token<T> | null { | |
if (this._cursor > this._source.length) { | |
throw new SyntaxError(`tokenizer cursor overrun`) | |
} | |
if (this.eof) return null | |
for (const [name, matcher] of this.tokens) { | |
const str = this._source.slice(this._cursor) | |
const match = matcher.exec(str)?.[0] | |
const skip = name == null || name.length === 0 | |
if (match != null) { | |
this._cursor += match.length | |
if (skip) { | |
return this._consume() | |
} | |
return { type: name as T, value: match } | |
} | |
} | |
throw new SyntaxError(`tokenizer unexpected token "${this._source[this._cursor]}"`) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment