尝试重构一下代码

This commit is contained in:
Gary Gan 2025-06-04 18:14:24 +08:00
parent dbb1abdcd5
commit 25236a4901
9 changed files with 786 additions and 255 deletions

View File

@ -11,9 +11,7 @@ include_directories(${CMAKE_SOURCE_DIR}/include)
file(GLOB SOURCES ${CMAKE_SOURCE_DIR}/src/*.cpp) file(GLOB SOURCES ${CMAKE_SOURCE_DIR}/src/*.cpp)
add_executable(Hydrogen ${SOURCES} add_executable(Hydrogen ${SOURCES})
src/scanner.cpp
src/Tbs.cpp)
file(GLOB TEST_SOURCES ${CMAKE_SOURCE_DIR}/unit/*.cpp) file(GLOB TEST_SOURCES ${CMAKE_SOURCE_DIR}/unit/*.cpp)

View File

@ -1,58 +0,0 @@
# pragma once
#include "stdc++.h"
#include "Token.h"
#include "Tbs.h"
#include <cstdlib>
#include <vector>
#include <iostream>
class Scanner {
public:
Scanner(std::string source_code, Tbs tables)
: m_source_code(source_code), m_tables(tables) {}
void scan() {
int len = 0;
for (int i = 0; i < m_source_code.size(); i++) {
if (len = process_const_table(i)) {
i += len - 1;
len = 0;
} else if (len = process_identifier_table(i)) {
i += len - 1;
len = 0;
} else if (len = process_key_table(i)) {
i += len - 1;
len = 0;
} else if (len = process_punct_table(i)) {
i += len - 1;
len = 0;
} else if (m_source_code[i] == ' ' || m_source_code[i] == '\t' || m_source_code[i] == '\n') {
continue;
} else {
std::cerr << "Error: Tokenize" << std::endl;
exit(0);
}
}
}
inline std::vector<Token> get_token_list() {
return m_token_list;
}
int process_const_table(int i);
int process_identifier_table(int i);
int process_key_table(int i);
int process_punct_table(int i);
private:
std::string m_source_code;
std::vector<Token> m_token_list;
Tbs m_tables;
int index;
// 记录标识符表的索引
int identifier_index = 0;
};

View File

@ -1,91 +0,0 @@
# pragma once
#include "stdc++.h"
#include <unordered_map>
using std::unordered_map,std::string;
class Tbs {
public:
unordered_map<int,string> ConstTable;
unordered_map<int,string> IdTable;
std::unordered_map<int, std::string> KeyTable = {
{1, "var"},
{2, "i8"},
{3, "i16"},
{4, "i32"},
{5, "i64"},
{6, "u8"},
{7, "u16"},
{8, "u32"},
{9, "u64"},
{10, "float32"},
{11, "float64"},
{12, "char"},
{13, "for"},
{14, "if"},
{15, "else"},
{16, "bool"},
{17, "string"},
{18, "vector"},
{19, "array"},
{20, "struct"},
{21, "tuple"},
{22, "print"},
{23, "println"}
};
std::unordered_map<int, std::string> PunctTable = {
{1, "-"},
{2, "!"},
{3, "~"},
{4, "/"},
{5, "*"},
{6, "%"},
{7, "+"},
{8, "-"},
{9, "<<"},
{10, ">>"},
{11, ">"},
{12, ">="},
{13, "<"},
{14, "<="},
{15, "=="},
{16, "!="},
{17, "&"},
{18, "^"},
{19, "|"},
{20, "&&"},
{21, "||"},
{22, "="},
{23, "%="},
{24, "*="},
{25, "/="},
{26, "+="},
{27, "-="},
{28, "|="},
{29, "&="},
{30, "^="},
{31, "<<="},
{32, ">>="},
{33, "("},
{34, ")"},
{35, "<"},
{36, ">"},
{37, ","},
{38, "."},
{39, "["},
{40, "]"},
{41, "?"},
{42, ":"},
{43, "->"},
{44,";"}
};
};
enum Table_Type {
CONST_TABLE,
ID_TABLE,
KEY_TABLE,
PUNCT_TABLE
};

View File

@ -1,8 +0,0 @@
#pragma once
#include "stdc++.h"
#include "Tbs.h"
struct Token{
int id;
Table_Type type;
};

553
include/syntax/Scanner.h Normal file
View File

@ -0,0 +1,553 @@
#pragma once
#include "doctest.h"
#include "token.h"
#include "../types.hpp"
#include <cctype>
#include <iostream>
#include <sstream>
#include <string>
class Scanner {
public:
Scanner(module_t& module)
: module(module) {}
private:
module_t& module;
inline std::string gen_word() {
return module.s_cursor.source.substr(module.s_cursor.current, module.s_cursor.length);
}
inline bool is_space(char c) {
if (c == '\n' || c == '\t' || c == '\r' || c == ' ') {
return true;
}
return false;
}
inline bool is_string(char s) {
return s == '"';
}
inline bool is_float(std::string word) {
// 是否包含 .,包含则为 float
int dot_count = 0;
bool has_e = false;
for (std::string::size_type i = 0; i < word.size(); i++) {
if (word[i] == '.')
dot_count++;
else if (word[i] == 'e' || word[i] == 'E')
has_e = true;
}
// 结尾不能是 .
if (word[-1] == '.') {
std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: "
<< module.s_cursor.column <<". floating-point numbers cannot end with '.'";
return false;
}
// 如果有科学计数法标记,则认为是浮点数
if (has_e) {
return true;
}
if (dot_count == 0) {
return false;
}
if (dot_count > 1) {
std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: "
<< module.s_cursor.column <<". floating-point numbers have multiple '.'";
return false;
}
return true;
}
inline bool is_alpha(char c) {
return std::isalpha(c);
}
inline bool is_number(char c) {
return std::isdigit(c);
}
inline bool is_hex_number(char c) {
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
inline bool is_oct_number(char c) {
return c >= '0' && c <= '7';
}
inline bool is_bin_number(char c) {
return c == '0' || c == '1';
}
inline bool at_eof() {
return module.s_cursor.guard == '\0';
}
inline char guard_advance() {
module.s_cursor.guard++;
module.s_cursor.length++;
module.s_cursor.column++;
if (module.s_cursor.source[module.s_cursor.guard] == '\n') {
module.s_cursor.line++;
module.s_cursor.column = 0;
}
return module.s_cursor.source[module.s_cursor.guard];
}
inline bool match(char expected) {
if (at_eof())
return false;
if (module.source[module.s_cursor.guard] != expected)
return false;
guard_advance();
return true;
}
inline std::string ident_advance() {
while((is_alpha(module.s_cursor.source[module.s_cursor.guard]) ||
is_number(module.s_cursor.source[module.s_cursor.guard])) &&
!at_eof()) {
guard_advance();
}
return gen_word();
}
inline token_type_t scanner_special_char(module_t *m) {
char c = guard_advance();
switch (c) {
case '(':
return TOKEN_LEFT_PAREN;
case ')':
return TOKEN_RIGHT_PAREN;
case '[':
return TOKEN_LEFT_SQUARE;
case ']':
return TOKEN_RIGHT_SQUARE;
case '{':
return TOKEN_LEFT_CURLY;
case '}':
return TOKEN_RIGHT_CURLY;
case ':':
return TOKEN_COLON;
case ';':
return TOKEN_STMT_EOF;
case ',':
return TOKEN_COMMA;
case '?':
return TOKEN_QUESTION;
case '%':
return match('=') ? TOKEN_PERSON_EQUAL : TOKEN_PERSON;
case '-':
if (match('=')) {
return TOKEN_MINUS_EQUAL;
}
if (match('>')) {
return TOKEN_RIGHT_ARROW;
}
return TOKEN_MINUS;
case '+':
return match('=') ? TOKEN_PLUS_EQUAL : TOKEN_PLUS;
case '/':
return match('=') ? TOKEN_SLASH_EQUAL : TOKEN_SLASH;
case '*': {
return match('=') ? TOKEN_STAR_EQUAL : TOKEN_STAR;
}
case '.': {
return TOKEN_DOT;
}
case '!':
return match('=') ? TOKEN_NOT_EQUAL : TOKEN_NOT;
case '=':
return match('=') ? TOKEN_EQUAL_EQUAL : TOKEN_EQUAL;
case '<':
if (match('<')) {
// <<
if (match('=')) {
// <<=
return TOKEN_LEFT_SHIFT_EQUAL;
}
// <<
return TOKEN_LEFT_SHIFT;
} else if (match('=')) {
return TOKEN_LESS_EQUAL;
}
return TOKEN_LEFT_ANGLE;
case '>': {
if (match('=')) {
// >=
return TOKEN_GREATER_EQUAL;
}
if (match('>') && match('=')) {
return TOKEN_RIGHT_SHIFT_EQUAL;
}
return TOKEN_RIGHT_ANGLE; // >
}
case '&':
return match('&') ? TOKEN_AND_AND : TOKEN_AND;
case '|':
return match('|') ? TOKEN_OR_OR : TOKEN_OR;
case '~':
return TOKEN_TILDE;
case '^':
return match('=') ? TOKEN_XOR_EQUAL : TOKEN_XOR;
default:
return token_type_t::TOKEN_NOT_IN_THIS_TYPE;
}
}
inline std::string string_advance() {
module.s_cursor.guard++;
char escape_char = '\\';
std::stringstream buf;
while (module.s_cursor.source[module.s_cursor.guard] != '\"' && !at_eof()) {
char guard = module.s_cursor.source[module.s_cursor.guard];
if (guard == '\n') {
std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: "
<< module.s_cursor.column <<". string cannot newline.";
}
// 处理转义字符
if (guard == escape_char) {
// 跳过转义字符第一个
module.s_cursor.guard++;
guard = module.s_cursor.source[module.s_cursor.guard];
switch (guard) {
case 'n':
guard = '\n';
break;
case 't':
guard = '\t';
break;
case 'r':
guard = '\r';
break;
case 'b':
guard = '\b';
break;
case 'f':
guard = '\f';
break;
case 'a':
guard = '\a';
break;
case 'v':
guard = '\v';
break;
case '0':
guard = '\0';
break;
case '\\':
case '\'':
case '\"':
break;
default:
std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: "
<< module.s_cursor.column <<". unknown escape char " << guard;
}
}
buf << guard;
guard_advance();
}
//跳过close char
module.s_cursor.guard++;
return buf.str();
}
inline long number_convert(std::string word, int base) {
try {
long decimal = std::stol(word, 0, base);
return decimal;
} catch (const std::invalid_argument& e) {
std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: "
<< module.s_cursor.column <<". Invalid number: " << word << std::endl;
return 0;
} catch (const std::out_of_range& e) {
std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: "
<< module.s_cursor.column <<". Number out of range: " << word << std::endl;
return 0;
}
}
inline std::string hex_number_advance() {
module.s_cursor.guard += 2; // 跳过 0x
while (is_hex_number(module.s_cursor.source[module.s_cursor.guard]) && !at_eof()) {
guard_advance();
}
return gen_word();
}
inline std::string oct_number_advance() {
module.s_cursor.guard += 2; // 跳过 0o
while (is_oct_number(module.s_cursor.source[module.s_cursor.guard]) && !at_eof()) {
guard_advance();
}
return gen_word();
}
inline std::string bin_number_advance() {
module.s_cursor.guard += 2; // 跳过 0b
while (is_bin_number(module.s_cursor.source[module.s_cursor.guard]) && !at_eof()) {
guard_advance();
}
return gen_word();
}
inline std::string number_advance() {
while(is_number(module.s_cursor.source[module.s_cursor.guard]) && !at_eof()) {
guard_advance();
}
// 处理小数部分
if (module.s_cursor.source[module.s_cursor.guard] == '.' && is_number(module.s_cursor.source[module.s_cursor.guard + 1])) {
guard_advance(); // 跳过小数点
while(is_number(module.s_cursor.source[module.s_cursor.guard]) && !at_eof()) {
guard_advance();
}
}
// 处理科学计数法
if ((module.s_cursor.source[module.s_cursor.guard] == 'e' || module.s_cursor.source[module.s_cursor.guard] == 'E')
&& (is_number(module.s_cursor.source[module.s_cursor.guard + 1]) ||
module.s_cursor.source[module.s_cursor.guard + 1] == '+' ||
module.s_cursor.source[module.s_cursor.guard + 1] == '-')) {
guard_advance(); // 跳过 e 或 E
if (module.s_cursor.source[module.s_cursor.guard] == '+' || module.s_cursor.source[module.s_cursor.guard] == '-') {
guard_advance(); // 跳过符号
}
while(is_number(module.s_cursor.source[module.s_cursor.guard]) && !at_eof()) {
guard_advance();
}
}
return gen_word();
}
static token_type_t scanner_ident(std::string word, int length) {
switch (word[0]) {
case 'a': {
switch (word[1]) {
case 'r': {
if (word.substr(2, 3) == "ray") {
return TOKEN_ARR;
}
}
}
break;
}
case 'b':
switch (word[1]) {
case 'o':
if (word.substr(2, 2) == "ol") {
return TOKEN_BOOL;
case 'r':
if (word.substr(2, 3) == "eak") {
return TOKEN_BREAK;
}
}
break;
case 'c':
switch (word[1]) {
case 'o':
if (word.substr(2, 6) == "ntinue") {
return TOKEN_CONTINUE;
}
}
break;
case 'e':
if (word.substr(1, 3) == "lse") {
if (length == 3 && word[3] == 'i') {
return TOKEN_ELSE_IF;
}
return TOKEN_ELSE;
}
return scanner_rest(word, length, 1, 3, "lse", TOKEN_ELSE);
case 'f': {
switch (word[1]) {
case 'n':
return scanner_rest(word, length, 2, 0, "", TOKEN_FN);
case 'a':
return scanner_rest(word, length, 2, 3, "lse", TOKEN_FALSE);
case 'l':
return scanner_rest(word, length, 2, 3, "oat", TOKEN_FLOAT);
case '3':
return scanner_rest(word, length, 2, 1, "2", TOKEN_F32);
case '6':
return scanner_rest(word, length, 2, 1, "4", TOKEN_F64);
case 'o':
return scanner_rest(word, length, 2, 1, "r", TOKEN_FOR);
}
break;
}
case 'g':
return scanner_rest(word, length, 1, 1, "o", TOKEN_GO);
case 'i': {
if (length == 2 && word[1] == 'n') {
return TOKEN_IN;
} else if (length == 2 && word[1] == 's') {
return TOKEN_IS;
} else if (length == 3 && word[1] == 'n' && word[2] == 't') {
return TOKEN_INT;
}
switch (word[1]) {
case 'm':
return scanner_rest(word, length, 2, 4, "port", TOKEN_IMPORT);
case 'f':
return scanner_rest(word, length, 2, 0, "", TOKEN_IF);
case 'n':
return scanner_rest(word, length, 2, 7, "terface", TOKEN_INTERFACE);
case '8':
return scanner_rest(word, length, 2, 0, "", TOKEN_I8);
case '1':
return scanner_rest(word, length, 2, 1, "6", TOKEN_I16);
case '3':
return scanner_rest(word, length, 2, 1, "2", TOKEN_I32);
case '6':
return scanner_rest(word, length, 2, 1, "4", TOKEN_I64);
}
break;
}
case 'l': {
return scanner_rest(word, length, 1, 2, "et", TOKEN_LET);
}
case 'n':
switch (word[1]) {
case 'u': // null
return scanner_rest(word, length, 2, 2, "ll", TOKEN_NULL);
// case 'e':// new, new 识别成 ident 在 parser 采用固定语法结构时才会被识别成 new
// return scanner_rest(word, length, 2, 1, "w", TOKEN_NEW);
}
break;
case 'p':
return scanner_rest(word, length, 1, 2, "tr", TOKEN_PTR);
case 's': {
// self,string,struct,sizeof,sett
switch (word[1]) {
case 'e': {
switch (word[2]) {
case 't':
return scanner_rest(word, length, 3, 0, "", TOKEN_SET);
case 'l': // select
return scanner_rest(word, length, 3, 3, "ect", TOKEN_SELECT);
}
}
}
if (length == 6 && word[1] == 't' && word[2] == 'r') {
switch (word[3]) {
case 'i':
return scanner_rest(word, length, 4, 2, "ng", TOKEN_STRING);
case 'u':
return scanner_rest(word, length, 4, 2, "ct", TOKEN_STRUCT);
}
}
break;
}
case 't': {
// tup/throw/type/true
switch (word[1]) {
case 'h':
return scanner_rest(word, length, 2, 3, "row", TOKEN_THROW);
case 'y': // type
return scanner_rest(word, length, 2, 2, "pe", TOKEN_TYPE);
case 'u': // tup
return scanner_rest(word, length, 2, 1, "p", TOKEN_TUP);
case 'r': {
switch (word[2]) {
case 'y':
return scanner_rest(word, length, 3, 0, "", TOKEN_TRY);
case 'u':
return scanner_rest(word, length, 3, 1, "e", TOKEN_TRUE);
}
break;
}
}
break;
}
case 'v': {
switch (word[1]) {
case 'a':
return scanner_rest(word, length, 2, 1, "r", TOKEN_VAR);
case 'e': // vec
return scanner_rest(word, length, 2, 1, "c", TOKEN_VEC);
case 'o': // void
return scanner_rest(word, length, 2, 2, "id", TOKEN_VOID);
}
}
case 'u': {
switch (word[1]) {
case 'i':
return scanner_rest(word, length, 2, 2, "nt", TOKEN_UINT);
case '8':
return scanner_rest(word, length, 2, 0, "", TOKEN_U8);
case '1':
return scanner_rest(word, length, 2, 1, "6", TOKEN_U16);
case '3':
return scanner_rest(word, length, 2, 1, "2", TOKEN_U32);
case '6':
return scanner_rest(word, length, 2, 1, "4", TOKEN_U64);
}
break;
}
case 'm': {
// map
switch (word[1]) {
case 'a': {
switch (word[2]) {
case 'p':
return scanner_rest(word, length, 3, 0, "", TOKEN_MAP);
case 't':
return scanner_rest(word, length, 3, 2, "ch", TOKEN_MATCH);
}
}
}
}
case 'r': {
return scanner_rest(word, length, 1, 5, "eturn", TOKEN_RETURN);
}
}
return TOKEN_IDENT;
}
inline token_t item() {
module.s_cursor.length = 0; // 重置长度
module.s_cursor.guard = module.s_cursor.current; // 重置游标位置
if (is_alpha(module.s_cursor.source[module.s_cursor.guard])) {
std::string word = ident_advance();
return token_t(ident)
}
}
};

201
include/syntax/token.h Normal file
View File

@ -0,0 +1,201 @@
#pragma once
#include <iostream>
#include <string>
#include <unordered_map>
#define DEBUG_SCANNER
enum token_type_t {
TOKEN_NOT_IN_THIS_TYPE = 0,
TOKEN_LEFT_PAREN,
TOKEN_RIGHT_PAREN,// ()
TOKEN_LEFT_SQUARE,
TOKEN_RIGHT_SQUARE,// []
TOKEN_LEFT_CURLY,
TOKEN_RIGHT_CURLY,// {}
TOKEN_LEFT_ANGLE, // <
TOKEN_LESS_THAN, // <
TOKEN_RIGHT_ANGLE,// >
TOKEN_COMMA, // ,
TOKEN_DOT, // .
TOKEN_MINUS, // -
TOKEN_PLUS, // +
TOKEN_COLON, // :
TOKEN_SEMICOLON, // ;
TOKEN_SLASH, // /
TOKEN_STAR, // a * b, *a
TOKEN_PERSON, // %
TOKEN_QUESTION, // ?
TOKEN_RIGHT_ARROW,// ->
TOKEN_NOT,// !
TOKEN_NOT_EQUAL,
TOKEN_EQUAL,
TOKEN_EQUAL_EQUAL,
TOKEN_GREATER_EQUAL,// >=
TOKEN_LESS_EQUAL, // <=
TOKEN_AND_AND, // &&
TOKEN_OR_OR, // ||
TOKEN_PLUS_EQUAL, // +=
TOKEN_MINUS_EQUAL, // -=
TOKEN_STAR_EQUAL, // *=
TOKEN_SLASH_EQUAL, // /=
TOKEN_PERSON_EQUAL, // %=
TOKEN_AND_EQUAL, // &=
TOKEN_OR_EQUAL, // |=
TOKEN_XOR_EQUAL, // ^=
TOKEN_LEFT_SHIFT_EQUAL, // <<=
TOKEN_RIGHT_SHIFT_EQUAL,// >>=
// 位运算
TOKEN_TILDE, // ~
TOKEN_AND, // &
TOKEN_OR, // |
TOKEN_XOR, // ^
TOKEN_LEFT_SHIFT, // <<
TOKEN_RIGHT_SHIFT,// >>
// 字面量
TOKEN_IDENT, // 标识符
TOKEN_LITERAL_STRING,
TOKEN_LITERAL_FLOAT,
TOKEN_LITERAL_INT,
// 类型
TOKEN_STRING,
TOKEN_BOOL,
TOKEN_U8,
TOKEN_U16,
TOKEN_U32,
TOKEN_U64,
TOKEN_I8,
TOKEN_I16,
TOKEN_I32,
TOKEN_I64,
TOKEN_F32,
TOKEN_F64,
// 内置复合类型
TOKEN_ARR,
TOKEN_VEC,
TOKEN_MAP,
TOKEN_TUP,
// 关键字
TOKEN_VAR,
TOKEN_TRUE,
TOKEN_FALSE,
TOKEN_TYPE,
TOKEN_STRUCT,
TOKEN_CONTINUE,
TOKEN_BREAK,
TOKEN_FOR,
TOKEN_IN,
TOKEN_IF,
TOKEN_ELSE,
TOKEN_ELSE_IF,
TOKEN_FN,
TOKEN_RETURN,
TOKEN_STMT_EOF, // ;
TOKEN_EOF,// TOKEN_EOF 一定要在最后一个,否则会索引溢出
};
inline static std::unordered_map<token_type_t, std::string> token_str = {
{TOKEN_LEFT_PAREN, "("},
{TOKEN_RIGHT_PAREN, ")"},
{TOKEN_LEFT_SQUARE, "["},
{TOKEN_RIGHT_SQUARE, "]"},
{TOKEN_LEFT_CURLY, "{"},
{TOKEN_RIGHT_CURLY, "}"},
{TOKEN_LEFT_ANGLE, "<"},
{TOKEN_LESS_THAN, "<"},
{TOKEN_RIGHT_ANGLE, ">"},
{TOKEN_COMMA, ","},
{TOKEN_DOT, "."},
{TOKEN_MINUS, "-"},
{TOKEN_PLUS, "+"},
{TOKEN_COLON, ":"},
{TOKEN_SEMICOLON, ";"},
{TOKEN_SLASH, "/"},
{TOKEN_STAR, "*"},
{TOKEN_PERSON, "%"},
{TOKEN_QUESTION, "?"},
{TOKEN_RIGHT_ARROW, "->"},
{TOKEN_NOT, "!"},
{TOKEN_NOT_EQUAL, "!="},
{TOKEN_EQUAL, "="},
{TOKEN_EQUAL_EQUAL, "=="},
{TOKEN_GREATER_EQUAL, ">="},
{TOKEN_LESS_EQUAL, "<="},
{TOKEN_AND_AND, "&&"},
{TOKEN_OR_OR, "||"},
{TOKEN_PLUS_EQUAL, "+="},
{TOKEN_MINUS_EQUAL, "-="},
{TOKEN_STAR_EQUAL, "*="},
{TOKEN_SLASH_EQUAL, "/="},
{TOKEN_PERSON_EQUAL, "%="},
{TOKEN_AND_EQUAL, "&="},
{TOKEN_OR_EQUAL, "|="},
{TOKEN_XOR_EQUAL, "^="},
{TOKEN_LEFT_SHIFT_EQUAL, "<<="},
{TOKEN_RIGHT_SHIFT_EQUAL, ">>="},
{TOKEN_TILDE, "~"},
{TOKEN_AND, "&"},
{TOKEN_OR, "|"},
{TOKEN_XOR, "^"},
{TOKEN_LEFT_SHIFT, "<<"},
{TOKEN_RIGHT_SHIFT, ">>"},
{TOKEN_IDENT, "ident_literal"},
{TOKEN_LITERAL_STRING, "string_literal"},
{TOKEN_LITERAL_FLOAT, "float_literal"},
{TOKEN_LITERAL_INT, "int_literal"},
{TOKEN_STRING, "string"},
{TOKEN_BOOL, "bool"},
{TOKEN_U8, "u8"},
{TOKEN_U16, "u16"},
{TOKEN_U32, "u32"},
{TOKEN_U64, "u64"},
{TOKEN_I8, "i8"},
{TOKEN_I16, "i16"},
{TOKEN_I32, "i32"},
{TOKEN_I64, "i64"},
{TOKEN_F32, "f32"},
{TOKEN_F64, "f64"},
{TOKEN_ARR, "arr"},
{TOKEN_VEC, "vec"},
{TOKEN_MAP, "map"},
{TOKEN_TUP, "tup"},
{TOKEN_VAR, "var"},
{TOKEN_TRUE, "true"},
{TOKEN_FALSE, "false"},
{TOKEN_TYPE, "type"},
{TOKEN_STRUCT, "struct"},
{TOKEN_CONTINUE, "continue"},
{TOKEN_BREAK, "break"},
{TOKEN_FOR, "for"},
{TOKEN_IN, "in"},
{TOKEN_IF, "if"},
{TOKEN_ELSE, "else"},
{TOKEN_ELSE_IF, "else if"},
{TOKEN_FN, "fn"},
{TOKEN_RETURN, "return"},
{TOKEN_STMT_EOF, ";"},
{TOKEN_EOF, "\0"}
};
struct token_t {
token_type_t type;
std::string literal;
int line;
int column;
int length;
token_t(token_type_t token_type, std::string literal, int line, int column)
: type(token_type), literal(literal), line(line), column(column), length(literal.size()) {
#ifdef DEBUG_SCANNER
std::cout << "[DEBUG] SCANNER line: " << line << ", type: " << token_str[token_type] << ", literal: " << literal << std::endl;
#endif
}
};

31
include/types.hpp Normal file
View File

@ -0,0 +1,31 @@
#pragma once
#include <string>
#include <vector>
#include "syntax/token.h"
struct scanner_cursor_t {
std::string source;
std::string::size_type current;
std::string::size_type guard;
int length;
int line; // 扫描器当前所在的行
int column; // 扫描器当前所在的列
};
struct module_t {
std::string source;
scanner_cursor_t s_cursor;
std::vector<token_t> token_list;
module_t(std::string source)
: source(source) {
s_cursor.source = source;
s_cursor.line = 1;
s_cursor.column = 1;
s_cursor.length = 0;
s_cursor.current = 0;
s_cursor.guard = 0;
}
};

View File

@ -1,94 +0,0 @@
#include "Scanner.h"
#include <sstream>
int Scanner::process_const_table(int index) {
return 0;
}
int Scanner::process_identifier_table(int index) {
std::stringstream buffer;
int old_index = index;
if (std::isalpha(m_source_code[index])) {
buffer << m_source_code[index];
index += 1;
while(std::isalnum(m_source_code[index])) {
buffer << m_source_code[index];
index += 1;
}
std::string identifier = buffer.str();
for (const auto& key : m_tables.KeyTable) {
if (identifier == key.second) {
return 0;
}
}
m_tables.IdTable.insert({identifier_index, identifier});
m_token_list.push_back(Token{identifier_index, ID_TABLE});
identifier_index++;
return index - old_index;
} else {
return 0;
}
}
int Scanner::process_key_table(int index) {
return 0;
}
int Scanner::process_punct_table(int index) {
//identify the Punct in map
string s;
int n = this->m_source_code.size();
char c1= this->m_source_code[index];
char c2 = '@';
char c3 = '@';
if (index + 1 < n) c2 = this->m_source_code[index + 1];
if (index + 2 < n) c3 = this->m_source_code[index + 2];
if (((c1 == c2 && c2 == '<') || (c1 == c2&& c2 == '>'))&& c3 =='=') {
this->m_token_list.push_back({c1=='<'?31:32,PUNCT_TABLE});
return 3;
}
if (c1 == c2 ) {
if (c1 =='=') {
this->m_token_list.push_back({15,PUNCT_TABLE});
return 2;
}
if (c1 =='|') {
this->m_token_list.push_back({21,PUNCT_TABLE});
return 2;
}
if (c1=='&') {
this->m_token_list.push_back({20,PUNCT_TABLE});
return 2;
}
if (c1=='<') {
this->m_token_list.push_back({9,PUNCT_TABLE});
return 2;
}
if (c1=='>') {
this->m_token_list.push_back({10,PUNCT_TABLE});
return 2;
}
}
string t;
t.push_back(c1);
t.push_back(c2);
for (auto e : this->m_tables.PunctTable) {
if (e.second == t) {
this->m_token_list.push_back({e.first,PUNCT_TABLE});
return 2;
}
}
t.pop_back();
for (auto e : this->m_tables.PunctTable) {
if (e.second == t) {
this->m_token_list.push_back({e.first,PUNCT_TABLE});
return 1;
}
}
return 0;
}

View File

@ -1 +0,0 @@
#include "Tbs.h"