jabney · November 23, 2024 16:25
diff --git a/arg-parser.ts b/arg-parser.ts
 /**
 * A minimally opinionated argument parser.
 *
 * Author: James Abney
 * License: MIT
 */
 import { Tokenizer, TokenSpec } from './tokenizer'

 const tokens: TokenSpec<TokenType>[] = [
    [null, /^\s+/], // whitespace
    ['long', /^--[A-Za-z][\w-]+/],
    ['short', /^-[A-Za-z]+/],
    ['var', /^[A-Za-z]\w*=[^\s=]*/],
    ['stop', /^--(?=\s|$)/],
    ['arg', /^\S+/],
 ]

 type TokenType = 'long' | 'short' | 'var' | 'stop' | 'arg'

 type KeyVal<T = string> = { [key: string]: T }

 /**
 * @throws {SyntaxError}
 */
 export function parseArgs(source: string) {
    const t = new Tokenizer<TokenType>(source, tokens)

    const arg: string[] = []
    const long: KeyVal<string[]>[] = []
    const short: KeyVal<string[]>[] = []
    const stop: string[] = []
    const _var: KeyVal[] = []

    while (t.lookahead != null) {
        const token = t.consumeN()

        switch (token.type) {
            case 'arg':
                arg.push(token.value)
                break
            case 'long':
                long.push({ [token.value.slice(2)]: [...consumeArgs(t)] })
                break
            case 'short':
                short.push({ [token.value.slice(1)]: [...consumeArgs(t)] })
                break
            case 'var':
                _var.push(Object.fromEntries([token.value.split('=')]))
                break
            case 'stop':
                stop.push(...consumeArgs(t))
                break
            default:
                throw new SyntaxError(`unrecognized token type "${token.type}"`)
        }
    }

    return { arg, long, short, var: _var, stop }
 }

 function* consumeArgs(t: Tokenizer<TokenType>): IterableIterator<string> {
    let count = 0
    while (t.lookahead?.type === 'arg') {
        const token = t.consumeN()
        yield token.value
        count += 1
    }
 }
diff --git a/tokenizer.ts b/tokenizer.ts
 /**
 * A single token lookahead tokenizer.
 *
 * Author: James Abney
 * License: MIT
 */
 export type Token<T extends string = string> = { readonly type: T; readonly value: string }
 export type TokenSpec<T extends string = string> = readonly [name: T | null, match: RegExp] // empty name tokens are thrown away.

 export class Tokenizer<T extends string = string> {
    private readonly tokens: readonly TokenSpec<T>[]
    private readonly _source: string
    private _cursor = 0
    private _lookahead: Token<T> | null = null

    constructor(source: string, tokens: TokenSpec<T>[]) {
        this._source = source
        this.tokens = tokens.slice()
        this._lookahead = this._consume()
    }

    get source(): string {
        return this._source
    }

    get cursor(): number {
        return this._cursor
    }

    get eof(): boolean {
        return this._cursor >= this._source.length
    }

    get lookahead(): Token<T> | null {
        return this._lookahead
    }

    /**
     * Consumes a token and returns the previous lookahead. If a token
     * type is specified, it must match the lookahead token's type.
     *
     * @throws {SyntaxError}
     */
    consume(): Token<T> | null
    consume(type: T): Token<T> | null
    consume(type?: T): Token<T> | null {
        const token = this._lookahead
        this._lookahead = this._consume()

        if (type == null) {
            return token
        }

        if (token == null) {
            return null
        }

        if (token.type === type) {
            return token
        }

        throw new SyntaxError(`expected token "${type}", received "${token.type}"`)
    }

    /**
     * Asserts that the lookahead token is not null.
     *
     * @throws {SyntaxError}
     */
    consumeN(): Token<T> {
        const token = this._lookahead
        this._lookahead = this._consume()

        if (token == null) {
            throw new SyntaxError(`token is null`)
        }
        return token
    }

    private _consume(): Token<T> | null {
        if (this._cursor > this._source.length) {
            throw new SyntaxError(`tokenizer cursor overrun`)
        }
        if (this.eof) return null

        for (const [name, matcher] of this.tokens) {
            const str = this._source.slice(this._cursor)
            const match = matcher.exec(str)?.[0]
            const skip = name == null || name.length === 0

            if (match != null) {
                this._cursor += match.length
                if (skip) {
                    return this._consume()
                }
                return { type: name as T, value: match }
            }
        }
        throw new SyntaxError(`tokenizer unexpected token "${this._source[this._cursor]}"`)
    }
 }
	/**
	* A minimally opinionated argument parser.
	*
	* Author: James Abney
	* License: MIT
	*/
	import { Tokenizer, TokenSpec } from './tokenizer'

	const tokens: TokenSpec<TokenType>[] = [
	[null, /^\s+/], // whitespace
	['long', /^--[A-Za-z][\w-]+/],
	['short', /^-[A-Za-z]+/],
	['var', /^[A-Za-z]\w=[^\s=]/],
	['stop', /^--(?=\s\|$)/],
	['arg', /^\S+/],
	]

	type TokenType = 'long' \| 'short' \| 'var' \| 'stop' \| 'arg'

	type KeyVal<T = string> = { [key: string]: T }

	/**
	* @throws {SyntaxError}
	*/
	export function parseArgs(source: string) {
	const t = new Tokenizer<TokenType>(source, tokens)

	const arg: string[] = []
	const long: KeyVal<string[]>[] = []
	const short: KeyVal<string[]>[] = []
	const stop: string[] = []
	const _var: KeyVal[] = []

	while (t.lookahead != null) {
	const token = t.consumeN()

	switch (token.type) {
	case 'arg':
	arg.push(token.value)
	break
	case 'long':
	long.push({ [token.value.slice(2)]: [...consumeArgs(t)] })
	break
	case 'short':
	short.push({ [token.value.slice(1)]: [...consumeArgs(t)] })
	break
	case 'var':
	_var.push(Object.fromEntries([token.value.split('=')]))
	break
	case 'stop':
	stop.push(...consumeArgs(t))
	break
	default:
	throw new SyntaxError(`unrecognized token type "${token.type}"`)
	}
	}

	return { arg, long, short, var: _var, stop }
	}

	function* consumeArgs(t: Tokenizer<TokenType>): IterableIterator<string> {
	let count = 0
	while (t.lookahead?.type === 'arg') {
	const token = t.consumeN()
	yield token.value
	count += 1
	}
	}
	/**
	* A single token lookahead tokenizer.
	*
	* Author: James Abney
	* License: MIT
	*/
	export type Token<T extends string = string> = { readonly type: T; readonly value: string }
	export type TokenSpec<T extends string = string> = readonly [name: T \| null, match: RegExp] // empty name tokens are thrown away.

	export class Tokenizer<T extends string = string> {
	private readonly tokens: readonly TokenSpec<T>[]
	private readonly _source: string
	private _cursor = 0
	private _lookahead: Token<T> \| null = null

	constructor(source: string, tokens: TokenSpec<T>[]) {
	this._source = source
	this.tokens = tokens.slice()
	this._lookahead = this._consume()
	}

	get source(): string {
	return this._source
	}

	get cursor(): number {
	return this._cursor
	}

	get eof(): boolean {
	return this._cursor >= this._source.length
	}

	get lookahead(): Token<T> \| null {
	return this._lookahead
	}

	/**
	* Consumes a token and returns the previous lookahead. If a token
	* type is specified, it must match the lookahead token's type.
	*
	* @throws {SyntaxError}
	*/
	consume(): Token<T> \| null
	consume(type: T): Token<T> \| null
	consume(type?: T): Token<T> \| null {
	const token = this._lookahead
	this._lookahead = this._consume()

	if (type == null) {
	return token
	}

	if (token == null) {
	return null
	}

	if (token.type === type) {
	return token
	}

	throw new SyntaxError(`expected token "${type}", received "${token.type}"`)
	}

	/**
	* Asserts that the lookahead token is not null.
	*
	* @throws {SyntaxError}
	*/
	consumeN(): Token<T> {
	const token = this._lookahead
	this._lookahead = this._consume()

	if (token == null) {
	throw new SyntaxError(`token is null`)
	}
	return token
	}

	private _consume(): Token<T> \| null {
	if (this._cursor > this._source.length) {
	throw new SyntaxError(`tokenizer cursor overrun`)
	}
	if (this.eof) return null

	for (const [name, matcher] of this.tokens) {
	const str = this._source.slice(this._cursor)
	const match = matcher.exec(str)?.[0]
	const skip = name == null \|\| name.length === 0

	if (match != null) {
	this._cursor += match.length
	if (skip) {
	return this._consume()
	}
	return { type: name as T, value: match }
	}
	}
	throw new SyntaxError(`tokenizer unexpected token "${this._source[this._cursor]}"`)
	}
	}