249 lines
8.1 KiB
C++
249 lines
8.1 KiB
C++
#pragma once
|
||
|
||
#include "../ast.hpp"
|
||
#include "../types.hpp"
|
||
#include "token.h"
|
||
|
||
class Parser {
|
||
public:
|
||
Parser(module_t module)
|
||
: m(module) {};
|
||
|
||
private:
|
||
module_t m;
|
||
|
||
// 定义优先级的枚举类型
|
||
enum precedence {
|
||
PRECEDENCE_NULL,// 最低优先级
|
||
PRECEDENCE_ASSIGN,
|
||
PRECEDENCE_CATCH,
|
||
PRECEDENCE_OR_OR, // ||
|
||
PRECEDENCE_AND_AND, // &&
|
||
PRECEDENCE_OR, // |
|
||
PRECEDENCE_XOR, // ^
|
||
PRECEDENCE_AND, // %
|
||
PRECEDENCE_CMP_EQUAL,// == !=
|
||
PRECEDENCE_COMPARE, // > < >= <=
|
||
PRECEDENCE_SHIFT, // << >>
|
||
PRECEDENCE_TERM, // + -
|
||
PRECEDENCE_FACTOR, // * / %
|
||
PRECEDENCE_UNARY, // - / ! / ~ / * / &
|
||
PRECEDENCE_CALL, // foo.bar foo["bar"] foo() foo().foo.bar 这几个表达式都是同一优先级,应该从左往右依次运算
|
||
PRECEDENCE_PRIMARY, // 最高优先级
|
||
};
|
||
|
||
inline std::unordered_map<token_type_t, ast_expr_op_t> token_to_ast_op = {
|
||
// 算术运算符
|
||
{TOKEN_PLUS, AST_OP_ADD}, // +
|
||
{TOKEN_MINUS, AST_OP_SUB}, // -
|
||
{TOKEN_STAR, AST_OP_MUL}, // *
|
||
{TOKEN_SLASH, AST_OP_DIV}, // /
|
||
{TOKEN_PERCENT, AST_OP_REM}, // %
|
||
|
||
// 比较运算符
|
||
{TOKEN_EQUAL_EQUAL, AST_OP_EE}, // ==
|
||
{TOKEN_NOT_EQUAL, AST_OP_NE}, // !=
|
||
{TOKEN_GREATER_EQUAL, AST_OP_GE},// >=
|
||
{TOKEN_RIGHT_ANGLE, AST_OP_GT}, // >
|
||
{TOKEN_LESS_EQUAL, AST_OP_LE}, // <=
|
||
{TOKEN_LESS_THAN, AST_OP_LT}, // <
|
||
|
||
// 逻辑运算符
|
||
{TOKEN_AND_AND, AST_OP_AND_AND}, // &&
|
||
{TOKEN_OR_OR, AST_OP_OR_OR}, // ||
|
||
|
||
// 位运算符
|
||
{TOKEN_TILDE, AST_OP_BNOT}, // ~
|
||
{TOKEN_AND, AST_OP_AND}, // &
|
||
{TOKEN_OR, AST_OP_OR}, // |
|
||
{TOKEN_XOR, AST_OP_XOR}, // ^
|
||
{TOKEN_LEFT_SHIFT, AST_OP_LSHIFT}, // <<
|
||
{TOKEN_RIGHT_SHIFT, AST_OP_RSHIFT}, // >>
|
||
|
||
// 复合赋值运算符(拆解为基本运算)
|
||
{TOKEN_PERCENT_EQUAL, AST_OP_REM}, // %=
|
||
{TOKEN_MINUS_EQUAL, AST_OP_SUB}, // -=
|
||
{TOKEN_PLUS_EQUAL, AST_OP_ADD}, // +=
|
||
{TOKEN_SLASH_EQUAL, AST_OP_DIV}, // /=
|
||
{TOKEN_STAR_EQUAL, AST_OP_MUL}, // *=
|
||
{TOKEN_OR_EQUAL, AST_OP_OR}, // |=
|
||
{TOKEN_AND_EQUAL, AST_OP_AND}, // &=
|
||
{TOKEN_XOR_EQUAL, AST_OP_XOR}, // ^=
|
||
{TOKEN_LEFT_SHIFT_EQUAL, AST_OP_LSHIFT}, // <<=
|
||
{TOKEN_RIGHT_SHIFT_EQUAL, AST_OP_RSHIFT}, // >>=
|
||
};
|
||
|
||
inline std::unordered_map<token_type_t, type_kind> token_to_type = {
|
||
// 字面量类型
|
||
{TOKEN_TRUE, TYPE_BOOL}, // true
|
||
{TOKEN_FALSE, TYPE_BOOL}, // false
|
||
{TOKEN_VOID, TYPE_VOID}, // void
|
||
{TOKEN_LITERAL_FLOAT, TYPE_FLOAT64},// 浮点字面量
|
||
{TOKEN_LITERAL_INT, TYPE_INT32}, // 整数字面量
|
||
{TOKEN_LITERAL_STRING, TYPE_STRING}, // 字符串字面量
|
||
|
||
// 基本数据类型
|
||
{TOKEN_BOOL, TYPE_BOOL}, // bool
|
||
{TOKEN_STRING, TYPE_STRING}, // string
|
||
|
||
// 定长整数类型
|
||
{TOKEN_I8, TYPE_INT8}, // int8
|
||
{TOKEN_I16, TYPE_INT16}, // int16
|
||
{TOKEN_I32, TYPE_INT32}, // int32
|
||
{TOKEN_I64, TYPE_INT64}, // int64
|
||
{TOKEN_U8, TYPE_UINT8}, // uint8
|
||
{TOKEN_U16, TYPE_UINT16}, // uint16
|
||
{TOKEN_U32, TYPE_UINT32}, // uint32
|
||
{TOKEN_U64, TYPE_UINT64}, // uint64
|
||
{TOKEN_F32, TYPE_FLOAT32}, // float32
|
||
{TOKEN_F64, TYPE_FLOAT64}, // float64
|
||
|
||
// 特殊类型
|
||
{TOKEN_VAR, TYPE_UNKNOWN}, // var (未推导类型)
|
||
|
||
// 复合数据类型
|
||
{TOKEN_VEC, TYPE_VEC}, // vector
|
||
{TOKEN_MAP, TYPE_MAP}, // map
|
||
};
|
||
|
||
// 返回当前token,token指针向前进一位
|
||
inline token_t advance() {
|
||
if (m.parser_cursor.current + 1 >= m.token_list.size()) {
|
||
std::cerr << "[ERROR] line: " << m.token_list[m.parser_cursor.current].line << ". column: "
|
||
<< m.token_list[m.parser_cursor.current].column <<". next token is null";
|
||
}
|
||
token_t token = m.token_list[m.parser_cursor.current];
|
||
m.parser_cursor.current++;
|
||
return token;
|
||
}
|
||
|
||
inline token_t retreate() {
|
||
if (m.parser_cursor.current - 1 < 0) {
|
||
std::cerr << "[ERROR] line: " << m.token_list[m.parser_cursor.current].line << ". column: "
|
||
<< m.token_list[m.parser_cursor.current].column <<". prev token is null";
|
||
}
|
||
token_t token = m.token_list[m.parser_cursor.current];
|
||
m.parser_cursor.current--;
|
||
return token;
|
||
}
|
||
|
||
inline token_t peak() {
|
||
return m.token_list[m.parser_cursor.current];
|
||
}
|
||
|
||
inline token_t prev() {
|
||
if (m.parser_cursor.current - 1 < 0) {
|
||
std::cerr << "[ERROR] line: " << m.token_list[m.parser_cursor.current].line << ". column: "
|
||
<< m.token_list[m.parser_cursor.current].column <<". prev token is null";
|
||
}
|
||
token_t token = m.token_list[m.parser_cursor.current - 1];
|
||
return token;
|
||
}
|
||
|
||
inline bool is(token_type_t expect) {
|
||
return m.token_list[m.parser_cursor.current].type == expect;
|
||
}
|
||
|
||
inline bool ident_is(std::string expect) {
|
||
return m.token_list[m.parser_cursor.current].literal == expect;
|
||
}
|
||
|
||
inline bool is_literal() {
|
||
return is(TOKEN_LITERAL_FLOAT) ||
|
||
is(TOKEN_LITERAL_INT) ||
|
||
is(TOKEN_LITERAL_STRING);
|
||
}
|
||
|
||
inline token_t consume(token_type_t expect) {
|
||
token_t t = m.token_list[m.parser_cursor.current];
|
||
if (t.type == expect) {
|
||
advance();
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
inline token_t must(token_type_t expect) {
|
||
token_t t = m.token_list[m.parser_cursor.current];
|
||
if (t.type != expect) {
|
||
std::cerr << "[ERROR] line: " << m.token_list[m.parser_cursor.current].line << ". column: "
|
||
<< m.token_list[m.parser_cursor.current].column <<". expected " << token_str[expect]
|
||
<< " but got " << t.literal;
|
||
}
|
||
|
||
advance();
|
||
return t;
|
||
}
|
||
|
||
inline token_t next(int step) {
|
||
if (m.parser_cursor.current + step >= m.token_list.size()) {
|
||
return nullptr;
|
||
}
|
||
return m.token_list[m.parser_cursor.current + step];
|
||
}
|
||
|
||
inline bool next_is(int step, token_type_t expect) {
|
||
if (m.parser_cursor.current + step >= m.token_list.size()) {
|
||
return false;
|
||
}
|
||
return m.token_list[m.parser_cursor.current + step].type == expect;
|
||
}
|
||
|
||
inline bool parser_must_stmt_end() {
|
||
if (is(TOKEN_EOF) || is(TOKEN_RIGHT_CURLY)) {
|
||
return true;
|
||
}
|
||
|
||
token_t t = prev();
|
||
std::cerr << "[ERROR] line: " << t.line << ". column: "
|
||
<< t.column<<". excepted ';' or '}' at end of statement";
|
||
return false;
|
||
}
|
||
|
||
inline bool is_basic_type() {
|
||
if (is(TOKEN_VAR) || is(TOKEN_VOID) ||
|
||
is(TOKEN_I8) || is(TOKEN_I16) ||
|
||
is(TOKEN_I32) || is(TOKEN_I64) ||
|
||
is(TOKEN_U8) || is(TOKEN_U16) || is(TOKEN_U32) ||
|
||
is(TOKEN_U64)|| is(TOKEN_F32) || is(TOKEN_F64) ||
|
||
is(TOKEN_BOOL) || is(TOKEN_STRING)) {
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
/**
|
||
* var a = xxx
|
||
* int a = xxx
|
||
* bool a = xxx
|
||
* string a = xxx
|
||
* @return
|
||
*/
|
||
inline bool is_type_begin_stmt() {
|
||
// var/any/int/float/bool/string
|
||
if (is_basic_type()) {
|
||
return true;
|
||
}
|
||
return true;
|
||
}
|
||
|
||
inline ast_stmt_t global_stmt() {
|
||
if (is_type_begin_stmt()) {
|
||
|
||
}
|
||
}
|
||
|
||
inline std::vector<ast_stmt_t> parser() {
|
||
// parser_cursor初始化已经在Scanner中初始化了
|
||
std::vector<ast_stmt_t> ret;
|
||
|
||
while (!is(TOKEN_EOF)) {
|
||
ret.push_back(parser_global_stmt());
|
||
parser_must_stmt_end();
|
||
}
|
||
|
||
return ret;
|
||
};
|
||
|
||
|
||
}; |