Skip to content

Instantly share code, notes, and snippets.

@jabney
Last active November 23, 2024 16:25
Show Gist options
  • Save jabney/71908dd3cbf370ebcae4a56c2e31c100 to your computer and use it in GitHub Desktop.
Save jabney/71908dd3cbf370ebcae4a56c2e31c100 to your computer and use it in GitHub Desktop.
A minimally opinionated argument parser
/**
* A minimally opinionated argument parser.
*
* Author: James Abney
* License: MIT
*/
import { Tokenizer, TokenSpec } from './tokenizer'
const tokens: TokenSpec<TokenType>[] = [
[null, /^\s+/], // whitespace
['long', /^--[A-Za-z][\w-]+/],
['short', /^-[A-Za-z]+/],
['var', /^[A-Za-z]\w*=[^\s=]*/],
['stop', /^--(?=\s|$)/],
['arg', /^\S+/],
]
type TokenType = 'long' | 'short' | 'var' | 'stop' | 'arg'
type KeyVal<T = string> = { [key: string]: T }
/**
* @throws {SyntaxError}
*/
export function parseArgs(source: string) {
const t = new Tokenizer<TokenType>(source, tokens)
const arg: string[] = []
const long: KeyVal<string[]>[] = []
const short: KeyVal<string[]>[] = []
const stop: string[] = []
const _var: KeyVal[] = []
while (t.lookahead != null) {
const token = t.consumeN()
switch (token.type) {
case 'arg':
arg.push(token.value)
break
case 'long':
long.push({ [token.value.slice(2)]: [...consumeArgs(t)] })
break
case 'short':
short.push({ [token.value.slice(1)]: [...consumeArgs(t)] })
break
case 'var':
_var.push(Object.fromEntries([token.value.split('=')]))
break
case 'stop':
stop.push(...consumeArgs(t))
break
default:
throw new SyntaxError(`unrecognized token type "${token.type}"`)
}
}
return { arg, long, short, var: _var, stop }
}
function* consumeArgs(t: Tokenizer<TokenType>): IterableIterator<string> {
let count = 0
while (t.lookahead?.type === 'arg') {
const token = t.consumeN()
yield token.value
count += 1
}
}
/**
* A single token lookahead tokenizer.
*
* Author: James Abney
* License: MIT
*/
export type Token<T extends string = string> = { readonly type: T; readonly value: string }
export type TokenSpec<T extends string = string> = readonly [name: T | null, match: RegExp] // empty name tokens are thrown away.
export class Tokenizer<T extends string = string> {
private readonly tokens: readonly TokenSpec<T>[]
private readonly _source: string
private _cursor = 0
private _lookahead: Token<T> | null = null
constructor(source: string, tokens: TokenSpec<T>[]) {
this._source = source
this.tokens = tokens.slice()
this._lookahead = this._consume()
}
get source(): string {
return this._source
}
get cursor(): number {
return this._cursor
}
get eof(): boolean {
return this._cursor >= this._source.length
}
get lookahead(): Token<T> | null {
return this._lookahead
}
/**
* Consumes a token and returns the previous lookahead. If a token
* type is specified, it must match the lookahead token's type.
*
* @throws {SyntaxError}
*/
consume(): Token<T> | null
consume(type: T): Token<T> | null
consume(type?: T): Token<T> | null {
const token = this._lookahead
this._lookahead = this._consume()
if (type == null) {
return token
}
if (token == null) {
return null
}
if (token.type === type) {
return token
}
throw new SyntaxError(`expected token "${type}", received "${token.type}"`)
}
/**
* Asserts that the lookahead token is not null.
*
* @throws {SyntaxError}
*/
consumeN(): Token<T> {
const token = this._lookahead
this._lookahead = this._consume()
if (token == null) {
throw new SyntaxError(`token is null`)
}
return token
}
private _consume(): Token<T> | null {
if (this._cursor > this._source.length) {
throw new SyntaxError(`tokenizer cursor overrun`)
}
if (this.eof) return null
for (const [name, matcher] of this.tokens) {
const str = this._source.slice(this._cursor)
const match = matcher.exec(str)?.[0]
const skip = name == null || name.length === 0
if (match != null) {
this._cursor += match.length
if (skip) {
return this._consume()
}
return { type: name as T, value: match }
}
}
throw new SyntaxError(`tokenizer unexpected token "${this._source[this._cursor]}"`)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment