Last active
November 26, 2024 03:07
-
-
Save jabney/40ea1915988924a42e280a3bb8303ef3 to your computer and use it in GitHub Desktop.
arg-parser: A stateless, minimally opinionated argument parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* A minimally opinionated argument parser with a stateless implementation. | |
* | |
* Author: James Abney | |
* License: MIT | |
*/ | |
import { Tokenizer, TokenSpec } from './tokenizer' | |
export interface IArg { | |
type: 'arg' | |
value: string | |
} | |
export interface ILong { | |
type: 'long' | |
name: string | |
args: string[] | |
} | |
export interface IShort { | |
type: 'short' | |
name: string | |
args: string[] | |
} | |
export interface IVar { | |
type: 'var' | |
var: [string, string] | |
} | |
export interface IStop { | |
type: 'stop' | |
args: string[] | |
} | |
export type ArgType = IArg | ILong | IShort | IVar | IStop | |
type TokenType = { [P in keyof ArgType]: ArgType[P] }['type'] | |
const tokens: TokenSpec<TokenType>[] = [ | |
[null, /^\s+/], // whitespace | |
['long', /^--[A-Za-z][\w-]+/], | |
['short', /^-[A-Za-z]+/], | |
['var', /^[A-Za-z]\w*=[^\s=]*/], | |
['stop', /^--(?=\s|$)/], | |
['arg', /^\S+/], | |
] | |
export function parseArgs(source: string): IterableIterator<ArgType> | |
export function parseArgs(source: string, cb: (item: ArgType) => void): void | |
export function parseArgs(source: string, cb?: (item: ArgType) => void): IterableIterator<ArgType> | void { | |
if (typeof cb === 'function') { | |
return parseArgsCallback(source, cb) | |
} | |
return parseArgsGenerator(source) | |
} | |
function parseArgsCallback(source: string, cb: (item: ArgType) => void): void { | |
const t = new Tokenizer<TokenType>(source, tokens) | |
while (t.lookahead != null) { | |
const token = t.consumeN() | |
switch (token.type) { | |
case 'arg': | |
cb({ type: 'arg', value: token.value }) | |
break | |
case 'long': | |
cb({ type: 'long', name: token.value.slice(2), args: [...consumeArgs(t)] }) | |
break | |
case 'short': | |
cb({ type: 'short', name: token.value.slice(1), args: [...consumeArgs(t)] }) | |
break | |
case 'var': | |
cb({ type: 'var', var: token.value.split('=') as [string, string] }) | |
break | |
case 'stop': | |
cb({ type: 'stop', args: [...consumeArgs(t)] }) | |
break | |
default: | |
throw new SyntaxError(`unrecognized token type "${token.type}"`) | |
} | |
} | |
} | |
function* parseArgsGenerator(source: string): IterableIterator<ArgType> { | |
const t = new Tokenizer<TokenType>(source, tokens) | |
while (t.lookahead != null) { | |
const token = t.consumeN() | |
switch (token.type) { | |
case 'arg': | |
yield { type: 'arg', value: token.value } | |
break | |
case 'long': | |
yield { type: 'long', name: token.value.slice(2), args: [...consumeArgs(t)] } | |
break | |
case 'short': | |
yield { type: 'short', name: token.value.slice(1), args: [...consumeArgs(t)] } | |
break | |
case 'var': | |
yield { type: 'var', var: token.value.split('=') as [string, string] } | |
break | |
case 'stop': | |
yield { type: 'stop', args: [...consumeArgs(t)] } | |
break | |
default: | |
throw new SyntaxError(`unrecognized token type "${token.type}"`) | |
} | |
} | |
} | |
function* consumeArgs(t: Tokenizer<TokenType>): IterableIterator<string> { | |
let count = 0 | |
while (t.lookahead?.type === 'arg') { | |
const token = t.consumeN() | |
yield token.value | |
count += 1 | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* A single token lookahead tokenizer. | |
* | |
* Author: James Abney | |
* License: MIT | |
*/ | |
export type Token<T extends string = string> = { readonly type: T; readonly value: string } | |
export type TokenSpec<T extends string = string> = readonly [name: T | null, match: RegExp] // empty name tokens are thrown away. | |
export class Tokenizer<T extends string = string> { | |
private readonly tokens: readonly TokenSpec<T>[] | |
private readonly _source: string | |
private _cursor = 0 | |
private _lookahead: Token<T> | null = null | |
constructor(source: string, tokens: TokenSpec<T>[]) { | |
this._source = source | |
this.tokens = tokens.slice() | |
this._lookahead = this._consume() | |
} | |
get source(): string { | |
return this._source | |
} | |
get cursor(): number { | |
return this._cursor | |
} | |
get eof(): boolean { | |
return this._cursor >= this._source.length | |
} | |
get lookahead(): Token<T> | null { | |
return this._lookahead | |
} | |
/** | |
* Consumes a token and returns the previous lookahead. If a token | |
* type is specified, it must match the lookahead token's type. | |
* | |
* @throws {SyntaxError} | |
*/ | |
consume(): Token<T> | null | |
consume(type: T): Token<T> | null | |
consume(type?: T): Token<T> | null { | |
const token = this._lookahead | |
this._lookahead = this._consume() | |
if (type == null) { | |
return token | |
} | |
if (token == null) { | |
return null | |
} | |
if (token.type === type) { | |
return token | |
} | |
throw new SyntaxError(`expected token "${type}", received "${token.type}"`) | |
} | |
/** | |
* Asserts that the lookahead token is not null. | |
* | |
* @throws {SyntaxError} | |
*/ | |
consumeN(): Token<T> { | |
const token = this._lookahead | |
this._lookahead = this._consume() | |
if (token == null) { | |
throw new SyntaxError(`token is null`) | |
} | |
return token | |
} | |
private _consume(): Token<T> | null { | |
if (this._cursor > this._source.length) { | |
throw new SyntaxError(`tokenizer cursor overrun`) | |
} | |
if (this.eof) return null | |
for (const [name, matcher] of this.tokens) { | |
const str = this._source.slice(this._cursor) | |
const match = matcher.exec(str)?.[0] | |
const skip = name == null || name.length === 0 | |
if (match != null) { | |
this._cursor += match.length | |
if (skip) { | |
return this._consume() | |
} | |
return { type: name as T, value: match } | |
} | |
} | |
throw new SyntaxError(`tokenizer unexpected token "${this._source[this._cursor]}"`) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment