Hydrogen/include/syntax/Parser.hpp
2025-06-05 21:29:46 +08:00

249 lines
8.1 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#pragma once
#include "../ast.hpp"
#include "../types.hpp"
#include "token.h"
class Parser {
public:
Parser(module_t module)
: m(module) {};
private:
module_t m;
// 定义优先级的枚举类型
enum precedence {
PRECEDENCE_NULL,// 最低优先级
PRECEDENCE_ASSIGN,
PRECEDENCE_CATCH,
PRECEDENCE_OR_OR, // ||
PRECEDENCE_AND_AND, // &&
PRECEDENCE_OR, // |
PRECEDENCE_XOR, // ^
PRECEDENCE_AND, // %
PRECEDENCE_CMP_EQUAL,// == !=
PRECEDENCE_COMPARE, // > < >= <=
PRECEDENCE_SHIFT, // << >>
PRECEDENCE_TERM, // + -
PRECEDENCE_FACTOR, // * / %
PRECEDENCE_UNARY, // - / ! / ~ / * / &
PRECEDENCE_CALL, // foo.bar foo["bar"] foo() foo().foo.bar 这几个表达式都是同一优先级,应该从左往右依次运算
PRECEDENCE_PRIMARY, // 最高优先级
};
inline std::unordered_map<token_type_t, ast_expr_op_t> token_to_ast_op = {
// 算术运算符
{TOKEN_PLUS, AST_OP_ADD}, // +
{TOKEN_MINUS, AST_OP_SUB}, // -
{TOKEN_STAR, AST_OP_MUL}, // *
{TOKEN_SLASH, AST_OP_DIV}, // /
{TOKEN_PERCENT, AST_OP_REM}, // %
// 比较运算符
{TOKEN_EQUAL_EQUAL, AST_OP_EE}, // ==
{TOKEN_NOT_EQUAL, AST_OP_NE}, // !=
{TOKEN_GREATER_EQUAL, AST_OP_GE},// >=
{TOKEN_RIGHT_ANGLE, AST_OP_GT}, // >
{TOKEN_LESS_EQUAL, AST_OP_LE}, // <=
{TOKEN_LESS_THAN, AST_OP_LT}, // <
// 逻辑运算符
{TOKEN_AND_AND, AST_OP_AND_AND}, // &&
{TOKEN_OR_OR, AST_OP_OR_OR}, // ||
// 位运算符
{TOKEN_TILDE, AST_OP_BNOT}, // ~
{TOKEN_AND, AST_OP_AND}, // &
{TOKEN_OR, AST_OP_OR}, // |
{TOKEN_XOR, AST_OP_XOR}, // ^
{TOKEN_LEFT_SHIFT, AST_OP_LSHIFT}, // <<
{TOKEN_RIGHT_SHIFT, AST_OP_RSHIFT}, // >>
// 复合赋值运算符(拆解为基本运算)
{TOKEN_PERCENT_EQUAL, AST_OP_REM}, // %=
{TOKEN_MINUS_EQUAL, AST_OP_SUB}, // -=
{TOKEN_PLUS_EQUAL, AST_OP_ADD}, // +=
{TOKEN_SLASH_EQUAL, AST_OP_DIV}, // /=
{TOKEN_STAR_EQUAL, AST_OP_MUL}, // *=
{TOKEN_OR_EQUAL, AST_OP_OR}, // |=
{TOKEN_AND_EQUAL, AST_OP_AND}, // &=
{TOKEN_XOR_EQUAL, AST_OP_XOR}, // ^=
{TOKEN_LEFT_SHIFT_EQUAL, AST_OP_LSHIFT}, // <<=
{TOKEN_RIGHT_SHIFT_EQUAL, AST_OP_RSHIFT}, // >>=
};
inline std::unordered_map<token_type_t, type_kind> token_to_type = {
// 字面量类型
{TOKEN_TRUE, TYPE_BOOL}, // true
{TOKEN_FALSE, TYPE_BOOL}, // false
{TOKEN_VOID, TYPE_VOID}, // void
{TOKEN_LITERAL_FLOAT, TYPE_FLOAT64},// 浮点字面量
{TOKEN_LITERAL_INT, TYPE_INT32}, // 整数字面量
{TOKEN_LITERAL_STRING, TYPE_STRING}, // 字符串字面量
// 基本数据类型
{TOKEN_BOOL, TYPE_BOOL}, // bool
{TOKEN_STRING, TYPE_STRING}, // string
// 定长整数类型
{TOKEN_I8, TYPE_INT8}, // int8
{TOKEN_I16, TYPE_INT16}, // int16
{TOKEN_I32, TYPE_INT32}, // int32
{TOKEN_I64, TYPE_INT64}, // int64
{TOKEN_U8, TYPE_UINT8}, // uint8
{TOKEN_U16, TYPE_UINT16}, // uint16
{TOKEN_U32, TYPE_UINT32}, // uint32
{TOKEN_U64, TYPE_UINT64}, // uint64
{TOKEN_F32, TYPE_FLOAT32}, // float32
{TOKEN_F64, TYPE_FLOAT64}, // float64
// 特殊类型
{TOKEN_VAR, TYPE_UNKNOWN}, // var (未推导类型)
// 复合数据类型
{TOKEN_VEC, TYPE_VEC}, // vector
{TOKEN_MAP, TYPE_MAP}, // map
};
// 返回当前tokentoken指针向前进一位
inline token_t advance() {
if (m.parser_cursor.current + 1 >= m.token_list.size()) {
std::cerr << "[ERROR] line: " << m.token_list[m.parser_cursor.current].line << ". column: "
<< m.token_list[m.parser_cursor.current].column <<". next token is null";
}
token_t token = m.token_list[m.parser_cursor.current];
m.parser_cursor.current++;
return token;
}
inline token_t retreate() {
if (m.parser_cursor.current - 1 < 0) {
std::cerr << "[ERROR] line: " << m.token_list[m.parser_cursor.current].line << ". column: "
<< m.token_list[m.parser_cursor.current].column <<". prev token is null";
}
token_t token = m.token_list[m.parser_cursor.current];
m.parser_cursor.current--;
return token;
}
inline token_t peak() {
return m.token_list[m.parser_cursor.current];
}
inline token_t prev() {
if (m.parser_cursor.current - 1 < 0) {
std::cerr << "[ERROR] line: " << m.token_list[m.parser_cursor.current].line << ". column: "
<< m.token_list[m.parser_cursor.current].column <<". prev token is null";
}
token_t token = m.token_list[m.parser_cursor.current - 1];
return token;
}
inline bool is(token_type_t expect) {
return m.token_list[m.parser_cursor.current].type == expect;
}
inline bool ident_is(std::string expect) {
return m.token_list[m.parser_cursor.current].literal == expect;
}
inline bool is_literal() {
return is(TOKEN_LITERAL_FLOAT) ||
is(TOKEN_LITERAL_INT) ||
is(TOKEN_LITERAL_STRING);
}
inline token_t consume(token_type_t expect) {
token_t t = m.token_list[m.parser_cursor.current];
if (t.type == expect) {
advance();
return true;
}
return false;
}
inline token_t must(token_type_t expect) {
token_t t = m.token_list[m.parser_cursor.current];
if (t.type != expect) {
std::cerr << "[ERROR] line: " << m.token_list[m.parser_cursor.current].line << ". column: "
<< m.token_list[m.parser_cursor.current].column <<". expected " << token_str[expect]
<< " but got " << t.literal;
}
advance();
return t;
}
inline token_t next(int step) {
if (m.parser_cursor.current + step >= m.token_list.size()) {
return nullptr;
}
return m.token_list[m.parser_cursor.current + step];
}
inline bool next_is(int step, token_type_t expect) {
if (m.parser_cursor.current + step >= m.token_list.size()) {
return false;
}
return m.token_list[m.parser_cursor.current + step].type == expect;
}
inline bool parser_must_stmt_end() {
if (is(TOKEN_EOF) || is(TOKEN_RIGHT_CURLY)) {
return true;
}
token_t t = prev();
std::cerr << "[ERROR] line: " << t.line << ". column: "
<< t.column<<". excepted ';' or '}' at end of statement";
return false;
}
inline bool is_basic_type() {
if (is(TOKEN_VAR) || is(TOKEN_VOID) ||
is(TOKEN_I8) || is(TOKEN_I16) ||
is(TOKEN_I32) || is(TOKEN_I64) ||
is(TOKEN_U8) || is(TOKEN_U16) || is(TOKEN_U32) ||
is(TOKEN_U64)|| is(TOKEN_F32) || is(TOKEN_F64) ||
is(TOKEN_BOOL) || is(TOKEN_STRING)) {
return true;
}
return false;
}
/**
* var a = xxx
* int a = xxx
* bool a = xxx
* string a = xxx
* @return
*/
inline bool is_type_begin_stmt() {
// var/any/int/float/bool/string
if (is_basic_type()) {
return true;
}
return true;
}
inline ast_stmt_t global_stmt() {
if (is_type_begin_stmt()) {
}
}
inline std::vector<ast_stmt_t> parser() {
// parser_cursor初始化已经在Scanner中初始化了
std::vector<ast_stmt_t> ret;
while (!is(TOKEN_EOF)) {
ret.push_back(parser_global_stmt());
parser_must_stmt_end();
}
return ret;
};
};