nopeless · November 30, 2021 22:22
diff --git a/arithmetic tokenizer.js b/arithmetic tokenizer.js
 /* eslint-disable consistent-return */


 class TokenType {
  constructor(type, regex) {
    this.type = type;
    this.regex = regex;
  }

  match(s) {
    const isMatch = this.regex.exec(s);
    console.log(isMatch);
    if (isMatch) {
      return {
        name: `Token`,
        type: this,
        content: isMatch[0],
        length: isMatch[0].length,
      };
    }
    return;
  }
 }


 // Apparently there isn't a way to make it anchor start and end by default (didn't know that)
 const INT = new TokenType(`int`, /^\d+/);
 const MUL = new TokenType(`mul`, /^\*/);
 const ADD = new TokenType(`add`, /^\+/);
 const OPEN_PAREN = new TokenType(`(`, /^\(/);
 const CLOSE_PAREN = new TokenType(`)`, /^\)/);

 const TOKEN_LIST = [INT, MUL, ADD, OPEN_PAREN, CLOSE_PAREN];

 /**
 * Splits by whitespace
 */
 function splitter(s) {
  return s.split(/\s/g);
 }

 // I forgot what this was called, but ill just call it context for now
 class Context {
  constructor(name, count) {
    this.name = name;
    this.count = count;
  }
 }

 class Grammar {
  /**
   * Executor should return self
   */
  constructor(executor) {
    this.executor = executor;
  }
  /**
   * Some other implementations write this as "start" and "end"
   */
  exec(tokens, entry) {
    return this.executor(tokens, entry);
  }
 }

 class IExpression {
  value() {
    throw new Error(`Not implemented`);
  }
  length() {
    throw new Error(`Not implemented`);
  }
 }

 class IntegerExpression extends IExpression {
  constructor(val) {
    super();
    this.val = val;
  }
  value() {
    return this.val;
  }
  length() {
    return 1;
  }
 }

 class AddExpression extends IExpression {
  constructor(left, right) {
    super();
    this.left = left;
    this.right = right;
  }
  value() {
    return this.left.value() + this.right.value();
  }
  length() {
    return 1 + this.left.length() + this.right.length();
  }
 }

 class MultiplyExpression extends IExpression {
  constructor(left, right) {
    super();
    this.left = left;
    this.right = right;
  }
  value() {
    return this.left.value() * this.right.value();
  }
  length() {
    return 1 + this.left.length() + this.right.length();
  }
 }

 class ParenthesizedExpression extends IExpression {
  constructor(expression) {
    super();
    this.expression = expression;
  }
  value() {
    return this.expression.value();
  }
  length() {
    return 2 + this.expression.length();
  }
 }

 class TermExpression extends IExpression {
  constructor(expression) {
    super();
    this.expression = expression;
  }
  value() {
    return this.expression.value();
  }

  length() {
    return this.expression.length();
  }
 }

 class ExpressionExpression extends IExpression {
  constructor(expression) {
    super();
    this.expression = expression;
  }
  value() {
    return this.expression.value();
  }
  length() {
    return this.expression.length();
  }
 }

 // Keep it simple just keep track of a global



 // entry point is Exp
 const Exp = new Grammar(function(tokens, entry) {
  const res = (() => {
    const term = Grammar.Term.exec(tokens);
    if (!term) throw new Error(`cannot parse`);
    tokens = tokens.slice(term.length());
    if (term instanceof TermExpression) {
      if (tokens.length) {
        // attempt to parse +
        if (tokens[0].type.type === `add`) {
          tokens = tokens.slice(1);
          const right = Grammar.Term.exec(tokens);
          if (!right) throw new Error(`cannot parse`);
          tokens = tokens.slice(right.length());
          return new AddExpression(term, right);
        }
        // attempt to parse *
        if (tokens[0].type.type === `mul`) {
          tokens = tokens.slice(1);
          const right = Grammar.Term.exec(tokens);
          if (!right) throw new Error(`cannot parse`);
          tokens = tokens.slice(term.length());
          return new MultiplyExpression(term, right);
        }
        throw new Error(`cannot parse anything other than add`);
      }
    }
    return new ExpressionExpression(term);
  })();

  if (entry && tokens.length) {
    throw new Error(`EXPECTED EOF, got ${tokens[0].content}`);
  }

  return res;
 });

 const Term = new Grammar(function(tokens, entry) {
  const res = (() => {
    if (tokens[0] && tokens[0].type.type === `(`) {
      tokens = tokens.slice(1);
      const exp = Grammar.Exp.exec(tokens);
      tokens = tokens.slice(exp.length());
      if (tokens[0].type.type !== `)`) {
        throw new Error(`expected closing paren`);
      }
      tokens = tokens.slice(1);
      return new ParenthesizedExpression(exp);
    }
    if (tokens[0] && tokens[0].type.type === `int`) {
      const num = Number(tokens[0].content);
      tokens = tokens.slice(1);
      if (tokens.length && tokens[0].type.type === `mul`) {
        tokens = tokens.slice(1);
        const right = Grammar.Term.exec(tokens);
        if (!right) {
          throw new Error(`cannot parse right side`);
        }
        return new TermExpression(new MultiplyExpression(new IntegerExpression(num), right));
      }
      return new TermExpression(new IntegerExpression(num));
    }
  })();
  if (entry && tokens.length) {
    throw new Error(`EXPECTED EOF, got ${tokens[0].content}`);
  }
  return res;
 });

 Grammar.Exp = Exp;
 Grammar.Term = Term;

 function tokenize(s) {
  s = splitter(s);
  const res = [];
  for (const t of s) {
    let pointer = 0;
    for (;;) {
      let hasMatch = false;
      const target = t.substring(pointer);
      if (!target) break;
      for (const token of TOKEN_LIST) {
        const match = token.match(target);
        if (match) {
          pointer += match.length;
          res.push(match);
          hasMatch = true;
          break;
        }
      }
      if (!hasMatch) {
        throw new Error(`Cannot parse '${target}'`);
      }
    }
    if (pointer < t.length) {
      throw new Error(`Expected EOF, got '${t.substring(pointer)}'`);
    }
  }
  return res;
 }

 const tokenized = tokenize(`1 + 2`);

 console.log(`tokenized`, tokenized);

 const res = Grammar.Exp.exec(tokenized, true);

 console.log(res);
 console.log(res.value());
	/* eslint-disable consistent-return */


	class TokenType {
	constructor(type, regex) {
	this.type = type;
	this.regex = regex;
	}

	match(s) {
	const isMatch = this.regex.exec(s);
	console.log(isMatch);
	if (isMatch) {
	return {
	name: `Token`,
	type: this,
	content: isMatch[0],
	length: isMatch[0].length,
	};
	}
	return;
	}
	}


	// Apparently there isn't a way to make it anchor start and end by default (didn't know that)
	const INT = new TokenType(`int`, /^\d+/);
	const MUL = new TokenType(`mul`, /^\*/);
	const ADD = new TokenType(`add`, /^\+/);
	const OPEN_PAREN = new TokenType(`(`, /^\(/);
	const CLOSE_PAREN = new TokenType(`)`, /^\)/);

	const TOKEN_LIST = [INT, MUL, ADD, OPEN_PAREN, CLOSE_PAREN];

	/**
	* Splits by whitespace
	*/
	function splitter(s) {
	return s.split(/\s/g);
	}

	// I forgot what this was called, but ill just call it context for now
	class Context {
	constructor(name, count) {
	this.name = name;
	this.count = count;
	}
	}

	class Grammar {
	/**
	* Executor should return self
	*/
	constructor(executor) {
	this.executor = executor;
	}
	/**
	* Some other implementations write this as "start" and "end"
	*/
	exec(tokens, entry) {
	return this.executor(tokens, entry);
	}
	}

	class IExpression {
	value() {
	throw new Error(`Not implemented`);
	}
	length() {
	throw new Error(`Not implemented`);
	}
	}

	class IntegerExpression extends IExpression {
	constructor(val) {
	super();
	this.val = val;
	}
	value() {
	return this.val;
	}
	length() {
	return 1;
	}
	}

	class AddExpression extends IExpression {
	constructor(left, right) {
	super();
	this.left = left;
	this.right = right;
	}
	value() {
	return this.left.value() + this.right.value();
	}
	length() {
	return 1 + this.left.length() + this.right.length();
	}
	}

	class MultiplyExpression extends IExpression {
	constructor(left, right) {
	super();
	this.left = left;
	this.right = right;
	}
	value() {
	return this.left.value() * this.right.value();
	}
	length() {
	return 1 + this.left.length() + this.right.length();
	}
	}

	class ParenthesizedExpression extends IExpression {
	constructor(expression) {
	super();
	this.expression = expression;
	}
	value() {
	return this.expression.value();
	}
	length() {
	return 2 + this.expression.length();
	}
	}

	class TermExpression extends IExpression {
	constructor(expression) {
	super();
	this.expression = expression;
	}
	value() {
	return this.expression.value();
	}

	length() {
	return this.expression.length();
	}
	}

	class ExpressionExpression extends IExpression {
	constructor(expression) {
	super();
	this.expression = expression;
	}
	value() {
	return this.expression.value();
	}
	length() {
	return this.expression.length();
	}
	}

	// Keep it simple just keep track of a global



	// entry point is Exp
	const Exp = new Grammar(function(tokens, entry) {
	const res = (() => {
	const term = Grammar.Term.exec(tokens);
	if (!term) throw new Error(`cannot parse`);
	tokens = tokens.slice(term.length());
	if (term instanceof TermExpression) {
	if (tokens.length) {
	// attempt to parse +
	if (tokens[0].type.type === `add`) {
	tokens = tokens.slice(1);
	const right = Grammar.Term.exec(tokens);
	if (!right) throw new Error(`cannot parse`);
	tokens = tokens.slice(right.length());
	return new AddExpression(term, right);
	}
	// attempt to parse *
	if (tokens[0].type.type === `mul`) {
	tokens = tokens.slice(1);
	const right = Grammar.Term.exec(tokens);
	if (!right) throw new Error(`cannot parse`);
	tokens = tokens.slice(term.length());
	return new MultiplyExpression(term, right);
	}
	throw new Error(`cannot parse anything other than add`);
	}
	}
	return new ExpressionExpression(term);
	})();

	if (entry && tokens.length) {
	throw new Error(`EXPECTED EOF, got ${tokens[0].content}`);
	}

	return res;
	});

	const Term = new Grammar(function(tokens, entry) {
	const res = (() => {
	if (tokens[0] && tokens[0].type.type === `(`) {
	tokens = tokens.slice(1);
	const exp = Grammar.Exp.exec(tokens);
	tokens = tokens.slice(exp.length());
	if (tokens[0].type.type !== `)`) {
	throw new Error(`expected closing paren`);
	}
	tokens = tokens.slice(1);
	return new ParenthesizedExpression(exp);
	}
	if (tokens[0] && tokens[0].type.type === `int`) {
	const num = Number(tokens[0].content);
	tokens = tokens.slice(1);
	if (tokens.length && tokens[0].type.type === `mul`) {
	tokens = tokens.slice(1);
	const right = Grammar.Term.exec(tokens);
	if (!right) {
	throw new Error(`cannot parse right side`);
	}
	return new TermExpression(new MultiplyExpression(new IntegerExpression(num), right));
	}
	return new TermExpression(new IntegerExpression(num));
	}
	})();
	if (entry && tokens.length) {
	throw new Error(`EXPECTED EOF, got ${tokens[0].content}`);
	}
	return res;
	});

	Grammar.Exp = Exp;
	Grammar.Term = Term;

	function tokenize(s) {
	s = splitter(s);
	const res = [];
	for (const t of s) {
	let pointer = 0;
	for (;;) {
	let hasMatch = false;
	const target = t.substring(pointer);
	if (!target) break;
	for (const token of TOKEN_LIST) {
	const match = token.match(target);
	if (match) {
	pointer += match.length;
	res.push(match);
	hasMatch = true;
	break;
	}
	}
	if (!hasMatch) {
	throw new Error(`Cannot parse '${target}'`);
	}
	}
	if (pointer < t.length) {
	throw new Error(`Expected EOF, got '${t.substring(pointer)}'`);
	}
	}
	return res;
	}

	const tokenized = tokenize(`1 + 2`);

	console.log(`tokenized`, tokenized);

	const res = Grammar.Exp.exec(tokenized, true);

	console.log(res);
	console.log(res.value());