Last active
March 21, 2023 15:07
-
-
Save marihachi/9a2123aa8770ea05242b298033518cd6 to your computer and use it in GitHub Desktop.
パーサーの例
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const T_EOF = 'EOF'; // end of input | |
const T_NUM = 'NUM'; // [0-9]+ | |
const T_ADD = 'ADD'; // "+" | |
const T_SUB = 'SUB'; // "-" | |
const T_MUL = 'MUL'; // "*" | |
const T_DIV = 'DIV'; // "/" | |
/** | |
* @param {string} input | |
*/ | |
function tokenize(input) { | |
let pos = 0; | |
const tokens = []; | |
while (pos < input.length) { | |
if (input[pos] == '+') { | |
tokens.push([T_ADD]); | |
pos += 1; | |
continue; | |
} | |
if (input[pos] == '-') { | |
tokens.push([T_SUB]); | |
pos += 1; | |
continue; | |
} | |
if (input[pos] == '*') { | |
tokens.push([T_MUL]); | |
pos += 1; | |
continue; | |
} | |
if (input[pos] == '/') { | |
tokens.push([T_DIV]); | |
pos += 1; | |
continue; | |
} | |
const match = /^([0-9]+)/.exec(input.slice(pos)); | |
if (match != null) { | |
const value = match[1]; | |
tokens.push([T_NUM, value]); | |
pos += value.length; | |
continue; | |
} | |
throw new Error('invalid input'); | |
} | |
tokens.push([T_EOF]); | |
return tokens; | |
} | |
function test() { | |
let tokens; | |
tokens = tokenize('1+2'); | |
console.log(tokens); | |
// => [ [ 'NUM', '1' ], [ 'ADD' ], [ 'NUM', '2' ], ['EOF'] ] | |
tokens = tokenize('2-1'); | |
console.log(tokens); | |
// => [ [ 'NUM', '2' ], [ 'SUB' ], [ 'NUM', '1' ], ['EOF'] ] | |
tokens = tokenize('2+2*3*2-10'); | |
console.log(tokens); | |
/* => [ | |
[ 'NUM', '2' ], | |
[ 'ADD' ], | |
[ 'NUM', '2' ], | |
[ 'MUL' ], | |
[ 'NUM', '3' ], | |
[ 'MUL' ], | |
[ 'NUM', '2' ], | |
[ 'SUB' ], | |
[ 'NUM', '10' ], | |
['EOF'] | |
] */ | |
} | |
test(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// tokens | |
const T_EOF = 'EOF'; // end of input | |
const T_EQ = 'EQ'; // "=" | |
const T_SEMI = 'SEMI'; // ";" | |
const T_NUM = 'NUM'; // [0-9]+ | |
const T_NAME = 'NAME'; // [A-Za-z0-9_]+ | |
/** | |
* @param {string} input | |
* @returns {[string, string | undefined]} | |
*/ | |
function tokenize(input) { | |
let pos = 0; | |
const tokens = []; | |
let match; | |
while (pos < input.length) { | |
// skip spaces | |
match = /^([ \r\n\t]+)/.exec(input.slice(pos)); | |
if (match != null) { | |
pos += match[1].length; | |
continue; | |
} | |
// EQ | |
if (input[pos] == '=') { | |
tokens.push([T_EQ]); | |
pos += 1; | |
continue; | |
} | |
// SEMI | |
if (input[pos] == ';') { | |
tokens.push([T_SEMI]); | |
pos += 1; | |
continue; | |
} | |
// NUM | |
match = /^([0-9]+)/.exec(input.slice(pos)); | |
if (match != null) { | |
const value = match[1]; | |
tokens.push([T_NUM, value]); | |
pos += value.length; | |
continue; | |
} | |
// NAME | |
match = /^([A-Za-z0-9_]+)/.exec(input.slice(pos)); | |
if (match != null) { | |
const value = match[1]; | |
tokens.push([T_NAME, value]); | |
pos += value.length; | |
continue; | |
} | |
throw new Error('invalid input'); | |
} | |
tokens.push([T_EOF]); | |
return tokens; | |
} | |
function testTokenize() { | |
let tokens; | |
tokens = tokenize('abc = 123;'); | |
console.log(tokens); | |
// => [ [ 'NAME', 'abc' ], [ 'EQ' ], [ 'NUM', '123' ], [ 'SEMI' ], [ 'EOF' ] ] | |
} | |
// parse | |
class ParseContext { | |
/** @param {[string, string | undefined][]} tokens */ | |
constructor(tokens) { | |
this.tokens = tokens; | |
this.pos = 0; | |
} | |
getToken() { | |
return this.tokens[this.pos]; | |
} | |
/** @param {string} tokenKind */ | |
expect(tokenKind) { | |
const token = this.getToken(); | |
if (token[0] != tokenKind) { | |
throw new Error('unexpected token'); | |
} | |
return token[1]; | |
} | |
next() { | |
if (this.getToken()[0] != T_EOF) { | |
this.pos += 1; | |
} | |
} | |
} | |
/** | |
* @param {ParseContext} p | |
*/ | |
function parse(p) { | |
const name = parseName(p); | |
// "=" token | |
p.expect(T_EQ); | |
p.next(); | |
const value = parseNum(p); | |
// ";" token | |
p.expect(T_SEMI); | |
p.next(); | |
return { kind: 'assign', name, value }; | |
} | |
/** | |
* @param {ParseContext} p | |
* @return {string} | |
*/ | |
function parseName(p) { | |
// name token | |
const value = p.expect(T_NAME); | |
p.next(); | |
return value; | |
} | |
/** | |
* @param {ParseContext} p | |
* @return {number} | |
*/ | |
function parseNum(p) { | |
// number token | |
const value = p.expect(T_NUM); | |
p.next(); | |
return Number(value); | |
} | |
function testParse() { | |
const tokens = tokenize('abc = 123;'); | |
const ast = parse(new ParseContext(tokens)); | |
console.log(ast); | |
// => { kind: 'assign', name: 'abc', value: 123 } | |
} | |
testParse(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// * add block statement | |
// * change expression | |
// tokens | |
const T_EOF = 'EOF'; // end of input | |
const T_EQ = 'EQ'; // "=" | |
const T_SEMI = 'SEMI'; // ";" | |
const T_BEGIN_BLOCK = 'BEGIN_BLOCK'; // "{" | |
const T_END_BLOCK = 'END_BLOCK'; // "}" | |
const T_NUM = 'NUM'; // [0-9]+ | |
const T_NAME = 'NAME'; // [A-Za-z0-9_]+ | |
/** | |
* @param {string} input | |
* @returns {[string, string | undefined]} | |
*/ | |
function tokenize(input) { | |
let pos = 0; | |
const tokens = []; | |
let match; | |
while (pos < input.length) { | |
// skip spaces | |
match = /^([ \r\n\t]+)/.exec(input.slice(pos)); | |
if (match != null) { | |
pos += match[1].length; | |
continue; | |
} | |
// EQ | |
if (input[pos] == '=') { | |
tokens.push([T_EQ]); | |
pos += 1; | |
continue; | |
} | |
// SEMI | |
if (input[pos] == ';') { | |
tokens.push([T_SEMI]); | |
pos += 1; | |
continue; | |
} | |
// BEGIN_BLOCK | |
if (input[pos] == '{') { | |
tokens.push([T_BEGIN_BLOCK]); | |
pos += 1; | |
continue; | |
} | |
// END_BLOCK | |
if (input[pos] == '}') { | |
tokens.push([T_END_BLOCK]); | |
pos += 1; | |
continue; | |
} | |
// NUM | |
match = /^([0-9]+)/.exec(input.slice(pos)); | |
if (match != null) { | |
const value = match[1]; | |
tokens.push([T_NUM, value]); | |
pos += value.length; | |
continue; | |
} | |
// NAME | |
match = /^([A-Za-z0-9_]+)/.exec(input.slice(pos)); | |
if (match != null) { | |
const value = match[1]; | |
tokens.push([T_NAME, value]); | |
pos += value.length; | |
continue; | |
} | |
throw new Error('invalid input'); | |
} | |
tokens.push([T_EOF]); | |
return tokens; | |
} | |
function testTokenize() { | |
let tokens; | |
tokens = tokenize('abc = 123;'); | |
console.log(tokens); | |
// => [ [ 'NAME', 'abc' ], [ 'EQ' ], [ 'NUM', '123' ], [ 'SEMI' ], ['EOF'] ] | |
} | |
// parse | |
class ParseContext { | |
/** @param {[string, string | undefined][]} tokens */ | |
constructor(tokens) { | |
this.tokens = tokens; | |
this.pos = 0; | |
} | |
getToken() { | |
return this.tokens[this.pos]; | |
} | |
/** @param {string} tokenKind */ | |
expect(tokenKind) { | |
const token = this.getToken(); | |
if (token[0] != tokenKind) { | |
throw new Error('unexpected token'); | |
} | |
return token[1]; | |
} | |
next() { | |
if (this.getToken()[0] != T_EOF) { | |
this.pos += 1; | |
} | |
} | |
} | |
/** | |
* @param {ParseContext} p | |
* @return {ReturnType<parseStatement>[]} | |
*/ | |
function parse(p) { | |
// statement list | |
const body = []; | |
while (p.getToken()[0] != T_EOF) { | |
body.push(parseStatement(p)); | |
} | |
return body; | |
} | |
/** | |
* @param {ParseContext} p | |
* @return {ReturnType<parseAssign> | ReturnType<parseBlock>} | |
*/ | |
function parseStatement(p) { | |
switch (p.getToken()[0]) { | |
case T_NAME: { | |
return parseAssign(p); | |
} | |
case T_BEGIN_BLOCK: { | |
return parseBlock(p); | |
} | |
} | |
throw new Error('unexpected token'); | |
} | |
/** | |
* @param {ParseContext} p | |
* @return {{ kind: 'assign', name: ReturnType<parseName>, value: ReturnType<parseExpr> }} | |
*/ | |
function parseAssign(p) { | |
const name = parseName(p); | |
// "=" token | |
p.expect(T_EQ); | |
p.next(); | |
const value = parseExpr(p); | |
// ";" token | |
p.expect(T_SEMI); | |
p.next(); | |
return { kind: 'assign', name, value }; | |
} | |
/** | |
* @param {ParseContext} p | |
* @return {{ kind: 'block', body: ReturnType<parseStatement>[] }} | |
*/ | |
function parseBlock(p) { | |
// "{" token | |
p.expect(T_BEGIN_BLOCK); | |
p.next(); | |
// statement list | |
const body = []; | |
while (p.getToken()[0] != T_END_BLOCK) { | |
body.push(parseStatement(p)); | |
} | |
// "}" token | |
p.expect(T_END_BLOCK); | |
p.next(); | |
return { kind: 'block', body }; | |
} | |
/** | |
* @param {ParseContext} p | |
* @return {ReturnType<parseName> | ReturnType<parseNum>} | |
*/ | |
function parseExpr(p) { | |
switch (p.getToken()[0]) { | |
case T_NAME: { | |
return parseName(p); | |
} | |
case T_NUM: { | |
return parseNum(p); | |
} | |
} | |
throw new Error('unexpected token'); | |
} | |
/** | |
* @param {ParseContext} p | |
* @return {{ kind: 'name', value: string }} | |
*/ | |
function parseName(p) { | |
// name token | |
const value = p.expect(T_NAME); | |
p.next(); | |
return { kind: 'name', value }; | |
} | |
/** | |
* @param {ParseContext} p | |
* @return {{ kind: 'num', value: number }} | |
*/ | |
function parseNum(p) { | |
// number token | |
const value = p.expect(T_NUM); | |
p.next(); | |
return { kind: 'num', value: Number(value) }; | |
} | |
function testParse_1() { | |
const tokens = tokenize('abc = 123;'); | |
const ast = parse(new ParseContext(tokens)); | |
console.log(JSON.stringify(ast)); | |
// => [{ "kind": "assign", "name": { "kind": "name", "value": "abc" }, "value": { "kind": "num", "value": 123 }}] | |
} | |
testParse_1(); | |
function testParse_2() { | |
const tokens = tokenize('{ abc = 123; xyz = 456; aaa = xyz; }'); | |
const ast = parse(new ParseContext(tokens)); | |
console.log(JSON.stringify(ast)); | |
/* => [{ "kind": "block", "body": [ | |
{ | |
"kind": "assign", | |
"name": { "kind": "name", "value": "abc" }, | |
"value": { "kind": "num", "value": 123 } | |
}, | |
{ | |
"kind": "assign", | |
"name": { "kind": "name", "value": "xyz" }, | |
"value": { "kind": "num", "value": 456 } | |
}, | |
{ | |
"kind": "assign", | |
"name": { "kind": "name", "value": "aaa" }, | |
"value": { "kind": "name", "value": "xyz" } | |
} | |
]}] */ | |
} | |
testParse_2(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment