Hydrogen/include/syntax/token.h
2025-06-05 21:29:46 +08:00

204 lines
4.9 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#pragma once
#include <iostream>
#include <string>
#include <unordered_map>
#define DEBUG_SCANNER
enum token_type_t {
TOKEN_NOT_IN_THIS_TYPE = 0,
TOKEN_LEFT_PAREN,
TOKEN_RIGHT_PAREN,// ()
TOKEN_LEFT_SQUARE,
TOKEN_RIGHT_SQUARE,// []
TOKEN_LEFT_CURLY,
TOKEN_RIGHT_CURLY,// {}
TOKEN_LEFT_ANGLE, // <
TOKEN_LESS_THAN, // <
TOKEN_RIGHT_ANGLE,// >
TOKEN_COMMA, // ,
TOKEN_DOT, // .
TOKEN_MINUS, // -
TOKEN_PLUS, // +
TOKEN_COLON, // :
TOKEN_SEMICOLON, // ;
TOKEN_SLASH, // /
TOKEN_STAR, // a * b, *a
TOKEN_PERCENT, // %
TOKEN_QUESTION, // ?
TOKEN_RIGHT_ARROW,// ->
TOKEN_NOT,// !
TOKEN_NOT_EQUAL,
TOKEN_EQUAL,
TOKEN_EQUAL_EQUAL,
TOKEN_GREATER_EQUAL,// >=
TOKEN_LESS_EQUAL, // <=
TOKEN_AND_AND, // &&
TOKEN_OR_OR, // ||
TOKEN_PLUS_EQUAL, // +=
TOKEN_MINUS_EQUAL, // -=
TOKEN_STAR_EQUAL, // *=
TOKEN_SLASH_EQUAL, // /=
TOKEN_PERCENT_EQUAL, // %=
TOKEN_AND_EQUAL, // &=
TOKEN_OR_EQUAL, // |=
TOKEN_XOR_EQUAL, // ^=
TOKEN_LEFT_SHIFT_EQUAL, // <<=
TOKEN_RIGHT_SHIFT_EQUAL,// >>=
// 位运算
TOKEN_TILDE, // ~
TOKEN_AND, // &
TOKEN_OR, // |
TOKEN_XOR, // ^
TOKEN_LEFT_SHIFT, // <<
TOKEN_RIGHT_SHIFT,// >>
// 字面量
TOKEN_IDENT, // 标识符
TOKEN_LITERAL_STRING,
TOKEN_LITERAL_FLOAT,
TOKEN_LITERAL_INT,
// 类型
TOKEN_STRING,
TOKEN_BOOL,
TOKEN_U8,
TOKEN_U16,
TOKEN_U32,
TOKEN_U64,
TOKEN_I8,
TOKEN_I16,
TOKEN_I32,
TOKEN_I64,
TOKEN_F32,
TOKEN_F64,
// 内置复合类型
TOKEN_ARR,
TOKEN_VEC,
TOKEN_MAP,
TOKEN_TUP,
// 关键字
TOKEN_VAR,
TOKEN_TRUE,
TOKEN_FALSE,
TOKEN_TYPE,
// 要加上void函数没有返回时要用到
TOKEN_VOID,
TOKEN_STRUCT,
TOKEN_CONTINUE,
TOKEN_BREAK,
TOKEN_FOR,
TOKEN_IN,
TOKEN_IF,
TOKEN_ELSE,
TOKEN_ELSE_IF,
TOKEN_FN,
TOKEN_RETURN,
TOKEN_STMT_EOF, // ;
TOKEN_EOF,// TOKEN_EOF 一定要在最后一个,否则会索引溢出
};
inline static std::unordered_map<token_type_t, std::string> token_str = {
{TOKEN_LEFT_PAREN, "("},
{TOKEN_RIGHT_PAREN, ")"},
{TOKEN_LEFT_SQUARE, "["},
{TOKEN_RIGHT_SQUARE, "]"},
{TOKEN_LEFT_CURLY, "{"},
{TOKEN_RIGHT_CURLY, "}"},
{TOKEN_LEFT_ANGLE, "<"},
{TOKEN_LESS_THAN, "<"},
{TOKEN_RIGHT_ANGLE, ">"},
{TOKEN_COMMA, ","},
{TOKEN_DOT, "."},
{TOKEN_MINUS, "-"},
{TOKEN_PLUS, "+"},
{TOKEN_COLON, ":"},
{TOKEN_SEMICOLON, ";"},
{TOKEN_SLASH, "/"},
{TOKEN_STAR, "*"},
{TOKEN_PERCENT, "%"},
{TOKEN_QUESTION, "?"},
{TOKEN_RIGHT_ARROW, "->"},
{TOKEN_NOT, "!"},
{TOKEN_NOT_EQUAL, "!="},
{TOKEN_EQUAL, "="},
{TOKEN_EQUAL_EQUAL, "=="},
{TOKEN_GREATER_EQUAL, ">="},
{TOKEN_LESS_EQUAL, "<="},
{TOKEN_AND_AND, "&&"},
{TOKEN_OR_OR, "||"},
{TOKEN_PLUS_EQUAL, "+="},
{TOKEN_MINUS_EQUAL, "-="},
{TOKEN_STAR_EQUAL, "*="},
{TOKEN_SLASH_EQUAL, "/="},
{TOKEN_PERCENT_EQUAL, "%="},
{TOKEN_AND_EQUAL, "&="},
{TOKEN_OR_EQUAL, "|="},
{TOKEN_XOR_EQUAL, "^="},
{TOKEN_LEFT_SHIFT_EQUAL, "<<="},
{TOKEN_RIGHT_SHIFT_EQUAL, ">>="},
{TOKEN_TILDE, "~"},
{TOKEN_AND, "&"},
{TOKEN_OR, "|"},
{TOKEN_XOR, "^"},
{TOKEN_LEFT_SHIFT, "<<"},
{TOKEN_RIGHT_SHIFT, ">>"},
{TOKEN_IDENT, "ident_literal"},
{TOKEN_LITERAL_STRING, "string_literal"},
{TOKEN_LITERAL_FLOAT, "float_literal"},
{TOKEN_LITERAL_INT, "int_literal"},
{TOKEN_STRING, "string"},
{TOKEN_BOOL, "bool"},
{TOKEN_U8, "u8"},
{TOKEN_U16, "u16"},
{TOKEN_U32, "u32"},
{TOKEN_U64, "u64"},
{TOKEN_I8, "i8"},
{TOKEN_I16, "i16"},
{TOKEN_I32, "i32"},
{TOKEN_I64, "i64"},
{TOKEN_F32, "f32"},
{TOKEN_F64, "f64"},
{TOKEN_ARR, "arr"},
{TOKEN_VEC, "vec"},
{TOKEN_MAP, "map"},
{TOKEN_TUP, "tup"},
{TOKEN_VAR, "var"},
{TOKEN_TRUE, "true"},
{TOKEN_FALSE, "false"},
{TOKEN_TYPE, "type"},
{TOKEN_STRUCT, "struct"},
{TOKEN_CONTINUE, "continue"},
{TOKEN_BREAK, "break"},
{TOKEN_FOR, "for"},
{TOKEN_IN, "in"},
{TOKEN_VOID, "void"},
{TOKEN_IF, "if"},
{TOKEN_ELSE, "else"},
{TOKEN_ELSE_IF, "else if"},
{TOKEN_FN, "fn"},
{TOKEN_RETURN, "return"},
{TOKEN_STMT_EOF, ";"},
{TOKEN_EOF, "\0"}
};
struct token_t {
token_type_t type;
std::string literal;
int line;
int column;
int length;
token_t(token_type_t token_type, std::string literal, int line, int column)
: type(token_type), literal(literal), line(line), column(column), length(literal.size()) {
#ifdef DEBUG_SCANNER
std::cout << "[DEBUG] SCANNER line: " << line << ", type: " << token_str[token_type] << ", literal: " << literal << std::endl;
#endif
}
};