201 lines
4.8 KiB
C++
201 lines
4.8 KiB
C++
#pragma once
|
|
#include <iostream>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
|
|
#define DEBUG_SCANNER
|
|
|
|
enum token_type_t {
|
|
TOKEN_NOT_IN_THIS_TYPE = 0,
|
|
TOKEN_LEFT_PAREN,
|
|
TOKEN_RIGHT_PAREN,// ()
|
|
TOKEN_LEFT_SQUARE,
|
|
TOKEN_RIGHT_SQUARE,// []
|
|
TOKEN_LEFT_CURLY,
|
|
TOKEN_RIGHT_CURLY,// {}
|
|
TOKEN_LEFT_ANGLE, // <
|
|
TOKEN_LESS_THAN, // <
|
|
TOKEN_RIGHT_ANGLE,// >
|
|
|
|
TOKEN_COMMA, // ,
|
|
TOKEN_DOT, // .
|
|
TOKEN_MINUS, // -
|
|
TOKEN_PLUS, // +
|
|
TOKEN_COLON, // :
|
|
TOKEN_SEMICOLON, // ;
|
|
TOKEN_SLASH, // /
|
|
TOKEN_STAR, // a * b, *a
|
|
TOKEN_PERSON, // %
|
|
TOKEN_QUESTION, // ?
|
|
TOKEN_RIGHT_ARROW,// ->
|
|
|
|
TOKEN_NOT,// !
|
|
TOKEN_NOT_EQUAL,
|
|
TOKEN_EQUAL,
|
|
TOKEN_EQUAL_EQUAL,
|
|
TOKEN_GREATER_EQUAL,// >=
|
|
TOKEN_LESS_EQUAL, // <=
|
|
TOKEN_AND_AND, // &&
|
|
TOKEN_OR_OR, // ||
|
|
|
|
TOKEN_PLUS_EQUAL, // +=
|
|
TOKEN_MINUS_EQUAL, // -=
|
|
TOKEN_STAR_EQUAL, // *=
|
|
TOKEN_SLASH_EQUAL, // /=
|
|
TOKEN_PERSON_EQUAL, // %=
|
|
TOKEN_AND_EQUAL, // &=
|
|
TOKEN_OR_EQUAL, // |=
|
|
TOKEN_XOR_EQUAL, // ^=
|
|
TOKEN_LEFT_SHIFT_EQUAL, // <<=
|
|
TOKEN_RIGHT_SHIFT_EQUAL,// >>=
|
|
|
|
// 位运算
|
|
TOKEN_TILDE, // ~
|
|
TOKEN_AND, // &
|
|
TOKEN_OR, // |
|
|
TOKEN_XOR, // ^
|
|
TOKEN_LEFT_SHIFT, // <<
|
|
TOKEN_RIGHT_SHIFT,// >>
|
|
|
|
// 字面量
|
|
TOKEN_IDENT, // 标识符
|
|
TOKEN_LITERAL_STRING,
|
|
TOKEN_LITERAL_FLOAT,
|
|
TOKEN_LITERAL_INT,
|
|
|
|
// 类型
|
|
TOKEN_STRING,
|
|
TOKEN_BOOL,
|
|
TOKEN_U8,
|
|
TOKEN_U16,
|
|
TOKEN_U32,
|
|
TOKEN_U64,
|
|
TOKEN_I8,
|
|
TOKEN_I16,
|
|
TOKEN_I32,
|
|
TOKEN_I64,
|
|
TOKEN_F32,
|
|
TOKEN_F64,
|
|
|
|
// 内置复合类型
|
|
TOKEN_ARR,
|
|
TOKEN_VEC,
|
|
TOKEN_MAP,
|
|
TOKEN_TUP,
|
|
|
|
// 关键字
|
|
TOKEN_VAR,
|
|
TOKEN_TRUE,
|
|
TOKEN_FALSE,
|
|
TOKEN_TYPE,
|
|
TOKEN_STRUCT,
|
|
TOKEN_CONTINUE,
|
|
TOKEN_BREAK,
|
|
TOKEN_FOR,
|
|
TOKEN_IN,
|
|
TOKEN_IF,
|
|
TOKEN_ELSE,
|
|
TOKEN_ELSE_IF,
|
|
TOKEN_FN,
|
|
TOKEN_RETURN,
|
|
TOKEN_STMT_EOF, // ;
|
|
TOKEN_EOF,// TOKEN_EOF 一定要在最后一个,否则会索引溢出
|
|
};
|
|
|
|
inline static std::unordered_map<token_type_t, std::string> token_str = {
|
|
{TOKEN_LEFT_PAREN, "("},
|
|
{TOKEN_RIGHT_PAREN, ")"},
|
|
{TOKEN_LEFT_SQUARE, "["},
|
|
{TOKEN_RIGHT_SQUARE, "]"},
|
|
{TOKEN_LEFT_CURLY, "{"},
|
|
{TOKEN_RIGHT_CURLY, "}"},
|
|
{TOKEN_LEFT_ANGLE, "<"},
|
|
{TOKEN_LESS_THAN, "<"},
|
|
{TOKEN_RIGHT_ANGLE, ">"},
|
|
{TOKEN_COMMA, ","},
|
|
{TOKEN_DOT, "."},
|
|
{TOKEN_MINUS, "-"},
|
|
{TOKEN_PLUS, "+"},
|
|
{TOKEN_COLON, ":"},
|
|
{TOKEN_SEMICOLON, ";"},
|
|
{TOKEN_SLASH, "/"},
|
|
{TOKEN_STAR, "*"},
|
|
{TOKEN_PERSON, "%"},
|
|
{TOKEN_QUESTION, "?"},
|
|
{TOKEN_RIGHT_ARROW, "->"},
|
|
{TOKEN_NOT, "!"},
|
|
{TOKEN_NOT_EQUAL, "!="},
|
|
{TOKEN_EQUAL, "="},
|
|
{TOKEN_EQUAL_EQUAL, "=="},
|
|
{TOKEN_GREATER_EQUAL, ">="},
|
|
{TOKEN_LESS_EQUAL, "<="},
|
|
{TOKEN_AND_AND, "&&"},
|
|
{TOKEN_OR_OR, "||"},
|
|
{TOKEN_PLUS_EQUAL, "+="},
|
|
{TOKEN_MINUS_EQUAL, "-="},
|
|
{TOKEN_STAR_EQUAL, "*="},
|
|
{TOKEN_SLASH_EQUAL, "/="},
|
|
{TOKEN_PERSON_EQUAL, "%="},
|
|
{TOKEN_AND_EQUAL, "&="},
|
|
{TOKEN_OR_EQUAL, "|="},
|
|
{TOKEN_XOR_EQUAL, "^="},
|
|
{TOKEN_LEFT_SHIFT_EQUAL, "<<="},
|
|
{TOKEN_RIGHT_SHIFT_EQUAL, ">>="},
|
|
{TOKEN_TILDE, "~"},
|
|
{TOKEN_AND, "&"},
|
|
{TOKEN_OR, "|"},
|
|
{TOKEN_XOR, "^"},
|
|
{TOKEN_LEFT_SHIFT, "<<"},
|
|
{TOKEN_RIGHT_SHIFT, ">>"},
|
|
{TOKEN_IDENT, "ident_literal"},
|
|
{TOKEN_LITERAL_STRING, "string_literal"},
|
|
{TOKEN_LITERAL_FLOAT, "float_literal"},
|
|
{TOKEN_LITERAL_INT, "int_literal"},
|
|
{TOKEN_STRING, "string"},
|
|
{TOKEN_BOOL, "bool"},
|
|
{TOKEN_U8, "u8"},
|
|
{TOKEN_U16, "u16"},
|
|
{TOKEN_U32, "u32"},
|
|
{TOKEN_U64, "u64"},
|
|
{TOKEN_I8, "i8"},
|
|
{TOKEN_I16, "i16"},
|
|
{TOKEN_I32, "i32"},
|
|
{TOKEN_I64, "i64"},
|
|
{TOKEN_F32, "f32"},
|
|
{TOKEN_F64, "f64"},
|
|
{TOKEN_ARR, "arr"},
|
|
{TOKEN_VEC, "vec"},
|
|
{TOKEN_MAP, "map"},
|
|
{TOKEN_TUP, "tup"},
|
|
{TOKEN_VAR, "var"},
|
|
{TOKEN_TRUE, "true"},
|
|
{TOKEN_FALSE, "false"},
|
|
{TOKEN_TYPE, "type"},
|
|
{TOKEN_STRUCT, "struct"},
|
|
{TOKEN_CONTINUE, "continue"},
|
|
{TOKEN_BREAK, "break"},
|
|
{TOKEN_FOR, "for"},
|
|
{TOKEN_IN, "in"},
|
|
{TOKEN_IF, "if"},
|
|
{TOKEN_ELSE, "else"},
|
|
{TOKEN_ELSE_IF, "else if"},
|
|
{TOKEN_FN, "fn"},
|
|
{TOKEN_RETURN, "return"},
|
|
{TOKEN_STMT_EOF, ";"},
|
|
{TOKEN_EOF, "\0"}
|
|
};
|
|
|
|
struct token_t {
|
|
token_type_t type;
|
|
std::string literal;
|
|
int line;
|
|
int column;
|
|
int length;
|
|
|
|
token_t(token_type_t token_type, std::string literal, int line, int column)
|
|
: type(token_type), literal(literal), line(line), column(column), length(literal.size()) {
|
|
#ifdef DEBUG_SCANNER
|
|
std::cout << "[DEBUG] SCANNER line: " << line << ", type: " << token_str[token_type] << ", literal: " << literal << std::endl;
|
|
#endif
|
|
}
|
|
}; |