parser未完全实现

This commit is contained in:
Gary Gan 2025-06-05 21:29:46 +08:00
parent 61f5dde8c9
commit 0a57b6e727
6 changed files with 435 additions and 5 deletions

119
include/ast.hpp Normal file
View File

@ -0,0 +1,119 @@
#pragma once
#include <map>
#include <string>
enum ast_type_t {
AST_EXPR_LITERAL = 1, // 常数值 => 预计将存储在 data 段中
AST_EXPR_BINARY, // 二元表达式
AST_EXPR_UNARY, // 一元表达式
AST_EXPR_IDENT, // 标识符表达式
AST_EXPR_MAP_ACCESS,
AST_EXPR_VEC_ACCESS,
AST_EXPR_ARRAY_ACCESS,
AST_EXPR_TUPLE_ACCESS,
AST_EXPR_STRUCT_SELECT,
AST_EXPR_VEC_NEW, // [1, 2, 3]
AST_EXPR_ARRAY_NEW, // [1, 2, 3]
AST_EXPR_VEC_REPEAT_NEW, // [0;12]
AST_EXPR_ARRAY_REPEAT_NEW, // [0;12]
AST_EXPR_EMPTY_CURLY_NEW, // {}
AST_EXPR_MAP_NEW, // {"a": 1, "b": 2}
AST_EXPR_TUPLE_NEW, // (1, 1.1, true)
AST_EXPR_TUPLE_DESTR, // (var_a, var_b, (var_c, var_d))
AST_EXPR_STRUCT_NEW, // person {a = 1; b = 2}
AST_EXPR_BLOCK,
// stmt
AST_STMT_EXPR_FAKE,
AST_STMT_BREAK,
AST_STMT_CONTINUE,
AST_STMT_IMPORT,
AST_STMT_VARDEF,
AST_STMT_VAR_TUPLE_DESTR,
AST_STMT_ASSIGN,
AST_STMT_GLOBAL_ASSIGN,
AST_STMT_RETURN,
AST_STMT_IF,
AST_STMT_FOR_ITERATOR,
AST_STMT_FOR_COND,
AST_STMT_FOR_TRADITION,
AST_CALL,
AST_FNDEF, // fn def (其包含 body)
AST_STMT_ENV_CLOSURE, // closure def
};
enum ast_expr_op_t {
// ARITHMETIC 运算
AST_OP_ADD, // +
AST_OP_SUB, // -
AST_OP_MUL, // *
AST_OP_DIV, // /
AST_OP_REM, // %
// unary
AST_OP_NOT, // unary bool !right, right must bool
AST_OP_NEG, // unary number -right
AST_OP_BNOT, // unary binary ~right, right must int
// 位运算
AST_OP_AND,
AST_OP_OR,
AST_OP_XOR,
AST_OP_LSHIFT,
AST_OP_RSHIFT,
AST_OP_LT, // <
AST_OP_LE, // <=
AST_OP_GT, // >
AST_OP_GE, // >=
AST_OP_EE, // ==
AST_OP_NE, // !=
AST_OP_AND_AND, // &&
AST_OP_OR_OR, // ||
};
// 将ast_expr_op_t类型转换为string
inline std::unordered_map<ast_expr_op_t, std::string> ast_type_map = {
// 算术运算符
{AST_OP_ADD, "+"},
{AST_OP_SUB, "-"},
{AST_OP_MUL, "*"},
{AST_OP_DIV, "/"},
{AST_OP_REM, "%"},
// 位运算符
{AST_OP_AND, "&"},
{AST_OP_OR, "|"},
{AST_OP_XOR, "^"},
{AST_OP_BNOT, "~"},
{AST_OP_LSHIFT, "<<"},
{AST_OP_RSHIFT, ">>"},
// 比较运算符
{AST_OP_LT, "<"},
{AST_OP_LE, "<="},
{AST_OP_GT, ">"},
{AST_OP_GE, ">="},
{AST_OP_EE, "=="},
{AST_OP_NE, "!="},
// 逻辑运算符
{AST_OP_OR_OR, "||"},
{AST_OP_AND_AND, "&&"},
{AST_OP_NOT, "!"}, // 一元运算符:逻辑非
{AST_OP_NEG, "-"}, // 一元运算符:负号
};
struct ast_stmt_t {
int line;
int column;
bool error;
ast_type_t assert_type; // 声明语句类型
void *value;
};

249
include/syntax/Parser.hpp Normal file
View File

@ -0,0 +1,249 @@
#pragma once
#include "../ast.hpp"
#include "../types.hpp"
#include "token.h"
class Parser {
public:
Parser(module_t module)
: m(module) {};
private:
module_t m;
// 定义优先级的枚举类型
enum precedence {
PRECEDENCE_NULL,// 最低优先级
PRECEDENCE_ASSIGN,
PRECEDENCE_CATCH,
PRECEDENCE_OR_OR, // ||
PRECEDENCE_AND_AND, // &&
PRECEDENCE_OR, // |
PRECEDENCE_XOR, // ^
PRECEDENCE_AND, // %
PRECEDENCE_CMP_EQUAL,// == !=
PRECEDENCE_COMPARE, // > < >= <=
PRECEDENCE_SHIFT, // << >>
PRECEDENCE_TERM, // + -
PRECEDENCE_FACTOR, // * / %
PRECEDENCE_UNARY, // - / ! / ~ / * / &
PRECEDENCE_CALL, // foo.bar foo["bar"] foo() foo().foo.bar 这几个表达式都是同一优先级,应该从左往右依次运算
PRECEDENCE_PRIMARY, // 最高优先级
};
inline std::unordered_map<token_type_t, ast_expr_op_t> token_to_ast_op = {
// 算术运算符
{TOKEN_PLUS, AST_OP_ADD}, // +
{TOKEN_MINUS, AST_OP_SUB}, // -
{TOKEN_STAR, AST_OP_MUL}, // *
{TOKEN_SLASH, AST_OP_DIV}, // /
{TOKEN_PERCENT, AST_OP_REM}, // %
// 比较运算符
{TOKEN_EQUAL_EQUAL, AST_OP_EE}, // ==
{TOKEN_NOT_EQUAL, AST_OP_NE}, // !=
{TOKEN_GREATER_EQUAL, AST_OP_GE},// >=
{TOKEN_RIGHT_ANGLE, AST_OP_GT}, // >
{TOKEN_LESS_EQUAL, AST_OP_LE}, // <=
{TOKEN_LESS_THAN, AST_OP_LT}, // <
// 逻辑运算符
{TOKEN_AND_AND, AST_OP_AND_AND}, // &&
{TOKEN_OR_OR, AST_OP_OR_OR}, // ||
// 位运算符
{TOKEN_TILDE, AST_OP_BNOT}, // ~
{TOKEN_AND, AST_OP_AND}, // &
{TOKEN_OR, AST_OP_OR}, // |
{TOKEN_XOR, AST_OP_XOR}, // ^
{TOKEN_LEFT_SHIFT, AST_OP_LSHIFT}, // <<
{TOKEN_RIGHT_SHIFT, AST_OP_RSHIFT}, // >>
// 复合赋值运算符(拆解为基本运算)
{TOKEN_PERCENT_EQUAL, AST_OP_REM}, // %=
{TOKEN_MINUS_EQUAL, AST_OP_SUB}, // -=
{TOKEN_PLUS_EQUAL, AST_OP_ADD}, // +=
{TOKEN_SLASH_EQUAL, AST_OP_DIV}, // /=
{TOKEN_STAR_EQUAL, AST_OP_MUL}, // *=
{TOKEN_OR_EQUAL, AST_OP_OR}, // |=
{TOKEN_AND_EQUAL, AST_OP_AND}, // &=
{TOKEN_XOR_EQUAL, AST_OP_XOR}, // ^=
{TOKEN_LEFT_SHIFT_EQUAL, AST_OP_LSHIFT}, // <<=
{TOKEN_RIGHT_SHIFT_EQUAL, AST_OP_RSHIFT}, // >>=
};
inline std::unordered_map<token_type_t, type_kind> token_to_type = {
// 字面量类型
{TOKEN_TRUE, TYPE_BOOL}, // true
{TOKEN_FALSE, TYPE_BOOL}, // false
{TOKEN_VOID, TYPE_VOID}, // void
{TOKEN_LITERAL_FLOAT, TYPE_FLOAT64},// 浮点字面量
{TOKEN_LITERAL_INT, TYPE_INT32}, // 整数字面量
{TOKEN_LITERAL_STRING, TYPE_STRING}, // 字符串字面量
// 基本数据类型
{TOKEN_BOOL, TYPE_BOOL}, // bool
{TOKEN_STRING, TYPE_STRING}, // string
// 定长整数类型
{TOKEN_I8, TYPE_INT8}, // int8
{TOKEN_I16, TYPE_INT16}, // int16
{TOKEN_I32, TYPE_INT32}, // int32
{TOKEN_I64, TYPE_INT64}, // int64
{TOKEN_U8, TYPE_UINT8}, // uint8
{TOKEN_U16, TYPE_UINT16}, // uint16
{TOKEN_U32, TYPE_UINT32}, // uint32
{TOKEN_U64, TYPE_UINT64}, // uint64
{TOKEN_F32, TYPE_FLOAT32}, // float32
{TOKEN_F64, TYPE_FLOAT64}, // float64
// 特殊类型
{TOKEN_VAR, TYPE_UNKNOWN}, // var (未推导类型)
// 复合数据类型
{TOKEN_VEC, TYPE_VEC}, // vector
{TOKEN_MAP, TYPE_MAP}, // map
};
// 返回当前tokentoken指针向前进一位
inline token_t advance() {
if (m.parser_cursor.current + 1 >= m.token_list.size()) {
std::cerr << "[ERROR] line: " << m.token_list[m.parser_cursor.current].line << ". column: "
<< m.token_list[m.parser_cursor.current].column <<". next token is null";
}
token_t token = m.token_list[m.parser_cursor.current];
m.parser_cursor.current++;
return token;
}
inline token_t retreate() {
if (m.parser_cursor.current - 1 < 0) {
std::cerr << "[ERROR] line: " << m.token_list[m.parser_cursor.current].line << ". column: "
<< m.token_list[m.parser_cursor.current].column <<". prev token is null";
}
token_t token = m.token_list[m.parser_cursor.current];
m.parser_cursor.current--;
return token;
}
inline token_t peak() {
return m.token_list[m.parser_cursor.current];
}
inline token_t prev() {
if (m.parser_cursor.current - 1 < 0) {
std::cerr << "[ERROR] line: " << m.token_list[m.parser_cursor.current].line << ". column: "
<< m.token_list[m.parser_cursor.current].column <<". prev token is null";
}
token_t token = m.token_list[m.parser_cursor.current - 1];
return token;
}
inline bool is(token_type_t expect) {
return m.token_list[m.parser_cursor.current].type == expect;
}
inline bool ident_is(std::string expect) {
return m.token_list[m.parser_cursor.current].literal == expect;
}
inline bool is_literal() {
return is(TOKEN_LITERAL_FLOAT) ||
is(TOKEN_LITERAL_INT) ||
is(TOKEN_LITERAL_STRING);
}
inline token_t consume(token_type_t expect) {
token_t t = m.token_list[m.parser_cursor.current];
if (t.type == expect) {
advance();
return true;
}
return false;
}
inline token_t must(token_type_t expect) {
token_t t = m.token_list[m.parser_cursor.current];
if (t.type != expect) {
std::cerr << "[ERROR] line: " << m.token_list[m.parser_cursor.current].line << ". column: "
<< m.token_list[m.parser_cursor.current].column <<". expected " << token_str[expect]
<< " but got " << t.literal;
}
advance();
return t;
}
inline token_t next(int step) {
if (m.parser_cursor.current + step >= m.token_list.size()) {
return nullptr;
}
return m.token_list[m.parser_cursor.current + step];
}
inline bool next_is(int step, token_type_t expect) {
if (m.parser_cursor.current + step >= m.token_list.size()) {
return false;
}
return m.token_list[m.parser_cursor.current + step].type == expect;
}
inline bool parser_must_stmt_end() {
if (is(TOKEN_EOF) || is(TOKEN_RIGHT_CURLY)) {
return true;
}
token_t t = prev();
std::cerr << "[ERROR] line: " << t.line << ". column: "
<< t.column<<". excepted ';' or '}' at end of statement";
return false;
}
inline bool is_basic_type() {
if (is(TOKEN_VAR) || is(TOKEN_VOID) ||
is(TOKEN_I8) || is(TOKEN_I16) ||
is(TOKEN_I32) || is(TOKEN_I64) ||
is(TOKEN_U8) || is(TOKEN_U16) || is(TOKEN_U32) ||
is(TOKEN_U64)|| is(TOKEN_F32) || is(TOKEN_F64) ||
is(TOKEN_BOOL) || is(TOKEN_STRING)) {
return true;
}
return false;
}
/**
* var a = xxx
* int a = xxx
* bool a = xxx
* string a = xxx
* @return
*/
inline bool is_type_begin_stmt() {
// var/any/int/float/bool/string
if (is_basic_type()) {
return true;
}
return true;
}
inline ast_stmt_t global_stmt() {
if (is_type_begin_stmt()) {
}
}
inline std::vector<ast_stmt_t> parser() {
// parser_cursor初始化已经在Scanner中初始化了
std::vector<ast_stmt_t> ret;
while (!is(TOKEN_EOF)) {
ret.push_back(parser_global_stmt());
parser_must_stmt_end();
}
return ret;
};
};

View File

@ -27,6 +27,10 @@ public:
} }
tokens.push_back(token_t(TOKEN_EOF, "EOF", module.s_cursor.line, module.s_cursor.column)); tokens.push_back(token_t(TOKEN_EOF, "EOF", module.s_cursor.line, module.s_cursor.column));
// 词法分析后实例化语法分析相关变量
module.parser_cursor.tokens = tokens;
module.parser_cursor.current = 0;
return tokens; return tokens;
} }
@ -171,7 +175,7 @@ private:
case '?': case '?':
return TOKEN_QUESTION; return TOKEN_QUESTION;
case '%': case '%':
return match('=') ? TOKEN_PERSON_EQUAL : TOKEN_PERSON; return match('=') ? TOKEN_PERCENT_EQUAL : TOKEN_PERCENT;
case '-': case '-':
if (match('=')) { if (match('=')) {
return TOKEN_MINUS_EQUAL; return TOKEN_MINUS_EQUAL;
@ -528,6 +532,10 @@ private:
if (word.substr(2, 1) == "c" && word.size() == 3) { if (word.substr(2, 1) == "c" && word.size() == 3) {
return TOKEN_VEC; return TOKEN_VEC;
} }
case 'o': // void
if (word.substr(2,2) == "id" && word.size() == 4) {
return TOKEN_VOID;
}
} }
} }
case 'u': { case 'u': {

View File

@ -25,7 +25,7 @@ enum token_type_t {
TOKEN_SEMICOLON, // ; TOKEN_SEMICOLON, // ;
TOKEN_SLASH, // / TOKEN_SLASH, // /
TOKEN_STAR, // a * b, *a TOKEN_STAR, // a * b, *a
TOKEN_PERSON, // % TOKEN_PERCENT, // %
TOKEN_QUESTION, // ? TOKEN_QUESTION, // ?
TOKEN_RIGHT_ARROW,// -> TOKEN_RIGHT_ARROW,// ->
@ -42,7 +42,7 @@ enum token_type_t {
TOKEN_MINUS_EQUAL, // -= TOKEN_MINUS_EQUAL, // -=
TOKEN_STAR_EQUAL, // *= TOKEN_STAR_EQUAL, // *=
TOKEN_SLASH_EQUAL, // /= TOKEN_SLASH_EQUAL, // /=
TOKEN_PERSON_EQUAL, // %= TOKEN_PERCENT_EQUAL, // %=
TOKEN_AND_EQUAL, // &= TOKEN_AND_EQUAL, // &=
TOKEN_OR_EQUAL, // |= TOKEN_OR_EQUAL, // |=
TOKEN_XOR_EQUAL, // ^= TOKEN_XOR_EQUAL, // ^=
@ -88,6 +88,8 @@ enum token_type_t {
TOKEN_TRUE, TOKEN_TRUE,
TOKEN_FALSE, TOKEN_FALSE,
TOKEN_TYPE, TOKEN_TYPE,
// 要加上void函数没有返回时要用到
TOKEN_VOID,
TOKEN_STRUCT, TOKEN_STRUCT,
TOKEN_CONTINUE, TOKEN_CONTINUE,
TOKEN_BREAK, TOKEN_BREAK,
@ -120,7 +122,7 @@ inline static std::unordered_map<token_type_t, std::string> token_str = {
{TOKEN_SEMICOLON, ";"}, {TOKEN_SEMICOLON, ";"},
{TOKEN_SLASH, "/"}, {TOKEN_SLASH, "/"},
{TOKEN_STAR, "*"}, {TOKEN_STAR, "*"},
{TOKEN_PERSON, "%"}, {TOKEN_PERCENT, "%"},
{TOKEN_QUESTION, "?"}, {TOKEN_QUESTION, "?"},
{TOKEN_RIGHT_ARROW, "->"}, {TOKEN_RIGHT_ARROW, "->"},
{TOKEN_NOT, "!"}, {TOKEN_NOT, "!"},
@ -135,7 +137,7 @@ inline static std::unordered_map<token_type_t, std::string> token_str = {
{TOKEN_MINUS_EQUAL, "-="}, {TOKEN_MINUS_EQUAL, "-="},
{TOKEN_STAR_EQUAL, "*="}, {TOKEN_STAR_EQUAL, "*="},
{TOKEN_SLASH_EQUAL, "/="}, {TOKEN_SLASH_EQUAL, "/="},
{TOKEN_PERSON_EQUAL, "%="}, {TOKEN_PERCENT_EQUAL, "%="},
{TOKEN_AND_EQUAL, "&="}, {TOKEN_AND_EQUAL, "&="},
{TOKEN_OR_EQUAL, "|="}, {TOKEN_OR_EQUAL, "|="},
{TOKEN_XOR_EQUAL, "^="}, {TOKEN_XOR_EQUAL, "^="},
@ -176,6 +178,7 @@ inline static std::unordered_map<token_type_t, std::string> token_str = {
{TOKEN_BREAK, "break"}, {TOKEN_BREAK, "break"},
{TOKEN_FOR, "for"}, {TOKEN_FOR, "for"},
{TOKEN_IN, "in"}, {TOKEN_IN, "in"},
{TOKEN_VOID, "void"},
{TOKEN_IF, "if"}, {TOKEN_IF, "if"},
{TOKEN_ELSE, "else"}, {TOKEN_ELSE, "else"},
{TOKEN_ELSE_IF, "else if"}, {TOKEN_ELSE_IF, "else if"},

View File

@ -3,6 +3,46 @@
#include <vector> #include <vector>
#include "syntax/token.h" #include "syntax/token.h"
enum type_kind {
TYPE_BOOL = 1,
TYPE_INT8,
TYPE_UINT8, // uint8 ~ int 的顺序不可变,用于隐式类型转换
TYPE_INT16,
TYPE_UINT16,
TYPE_INT32,
TYPE_UINT32, // value=10
TYPE_INT64,
TYPE_UINT64,
TYPE_FLOAT32,
TYPE_FLOAT64, // value = 5
// 复合类型
TYPE_STRING,
TYPE_VEC,
TYPE_MAP, // value = 20
TYPE_TUPLE,
TYPE_STRUCT,
TYPE_FN, // 具体的 fn 类型
TYPE_FN_T, // 底层类型
TYPE_INTEGER_T, // 底层类型
TYPE_FLOATER_T, // 底层类型
TYPE_ALL_T, // 通配所有类型
TYPE_VOID, // 表示函数无返回值
TYPE_UNKNOWN, // var a = 1, a 的类型就是 unknown
TYPE_RAW_STRING, // c 语言中的 string, 目前主要用于 lir 中的 string imm
// TYPE_ALIAS, // 声明一个新的类型时注册的 type 的类型是这个
// TYPE_PARAM, // type formal param type foo<f1, f2> = f1|f2, 其中 f1 就是一个 param
TYPE_IDENT,
};
// 语法分析扫描器指证
struct scanner_cursor_t { struct scanner_cursor_t {
std::string source; std::string source;
std::string::size_type current; std::string::size_type current;
@ -16,12 +56,21 @@ struct scanner_cursor_t {
char space_next; char space_next;
}; };
struct parser_cursor_t {
std::vector<token_t> tokens;
std::string::size_type current;
};
struct module_t { struct module_t {
std::string source; std::string source;
// 在语法分析中需要用到的变量
scanner_cursor_t s_cursor; scanner_cursor_t s_cursor;
std::vector<token_t> token_list; std::vector<token_t> token_list;
// 在语义分析中需要用到的变量
parser_cursor_t parser_cursor;
module_t(std::string source) module_t(std::string source)
: source(source) { : source(source) {
s_cursor.source = source; s_cursor.source = source;

2
src/ast.cpp Normal file
View File

@ -0,0 +1,2 @@
#include "ast.hpp"