Merge branch 'gary'

# Conflicts: # src/Scanner.cpp # unit/scanner_test.cpp
2025-06-04 21:52:50 +08:00 · 2025-06-04 21:52:50 +08:00 · b6d6df0894
commit b6d6df0894
parent 0bcec890a0 3ea3dfde27
14 changed files with 1156 additions and 222 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -11,9 +11,7 @@ include_directories(${CMAKE_SOURCE_DIR}/include)
 file(GLOB SOURCES ${CMAKE_SOURCE_DIR}/src/*.cpp)
-add_executable(Hydrogen ${SOURCES}
+add_executable(Hydrogen ${SOURCES})
        src/scanner.cpp
        src/Tbs.cpp)
 file(GLOB TEST_SOURCES ${CMAKE_SOURCE_DIR}/unit/*.cpp)
--- a/include/Scanner.h
+++ b/include/Scanner.h
@ -1,58 +0,0 @@
 # pragma once
 #include "stdc++.h"
 #include "Token.h"
 #include "Tbs.h"
 #include <cstdlib>
 #include <vector>
 #include <iostream>
 class Scanner {
 public:
    Scanner(std::string source_code, Tbs tables) 
        : m_source_code(source_code), m_tables(tables) {}
    void scan() {
        int len = 0;
        for (int i = 0; i < m_source_code.size(); i++) {
            if (len = process_const_table(i)) {
                i += len - 1;
                len = 0;
            } else if (len = process_identifier_table(i)) {
                i += len - 1;
                len = 0;
            } else if (len = process_key_table(i)) {
                i += len - 1;
                len = 0;
            } else if (len = process_punct_table(i)) {
                i += len - 1; 
                len = 0;
            } else if (m_source_code[i] == ' ' || m_source_code[i] == '\t' || m_source_code[i] == '\n') {
                continue;
            } else {
                std::cerr << "Error: Tokenize" << std::endl;
                exit(0);
            } 
        }
    }
    inline std::vector<Token> get_token_list() {
        return m_token_list;
    }
    int process_const_table(int i);
    int process_identifier_table(int i);
    int process_key_table(int i);
    int process_punct_table(int i);
 private:
    std::string m_source_code;
    std::vector<Token> m_token_list;
    Tbs m_tables;
    int index;
    // 记录标识符表的索引
    int identifier_index = 0;
 };
--- a/include/Tbs.h
+++ b/include/Tbs.h
@ -1,91 +0,0 @@
 # pragma once
 #include "stdc++.h"
 #include <unordered_map>
 using std::unordered_map,std::string;
 class Tbs {
 public:
    unordered_map<int,string> ConstTable;
    unordered_map<int,string> IdTable;
    std::unordered_map<int, std::string> KeyTable = {
        {1, "var"},
        {2, "i8"},
        {3, "i16"},
        {4, "i32"},
        {5, "i64"},
        {6, "u8"},
        {7, "u16"},
        {8, "u32"},
        {9, "u64"},
        {10, "float32"},
        {11, "float64"},
        {12, "char"},
        {13, "for"},
        {14, "if"},
        {15, "else"},
        {16, "bool"},
        {17, "string"},
        {18, "vector"},
        {19, "array"},
        {20, "struct"},
        {21, "tuple"},
        {22, "print"},
        {23, "println"}
    };
    std::unordered_map<int, std::string> PunctTable = {
        {1, "-"},
        {2, "!"},
        {3, "~"},
        {4, "/"},
        {5, "*"},
        {6, "%"},
        {7, "+"},
        {8, "-"},
        {9, "<<"},
        {10, ">>"},
        {11, ">"},
        {12, ">="},
        {13, "<"},
        {14, "<="},
        {15, "=="},
        {16, "!="},
        {17, "&"},
        {18, "^"},
        {19, "|"},
        {20, "&&"},
        {21, "||"},
        {22, "="},
        {23, "%="},
        {24, "*="},
        {25, "/="},
        {26, "+="},
        {27, "-="},
        {28, "|="},
        {29, "&="},
        {30, "^="},
        {31, "<<="},
        {32, ">>="},
        {33, "("},
        {34, ")"},
        {35, "<"},
        {36, ">"},
        {37, ","},
        {38, "."},
        {39, "["},
        {40, "]"},
        {41, "?"},
        {42, ":"},
        {43, "->"},
        {44,";"}
    };
 };
 enum Table_Type {
    CONST_TABLE,
    ID_TABLE,
    KEY_TABLE,
    PUNCT_TABLE
 };
--- a/include/Token.h
+++ b/include/Token.h
@ -1,8 +0,0 @@
 #pragma once
 #include "stdc++.h"
 #include "Tbs.h"
 struct Token{
    int id;
    Table_Type type;
 };
--- a/include/input.txt
+++ b/include/input.txt
@ -0,0 +1,52 @@
 //Merge Sort
 struct Point {
  x:i8;
  y:i8;
 }
 //  Struct
 /*
 ababa
 */
 [Point:105] tmp;
 fn MergeSort([Point:20] v,i8 l,i8 r) -> {
   if l>r {
      return ;   
   }
   var mid = l + r >>1;
   MergeSort(v,l,mid);
   MergeSort(v,mid+1,r);
   i8 i=l,j=mid+1,k=l;
   for ;i <= mid && j <= r;k+=1 {
      if v[l] < v[r]
      {
        tmp[k] = v[l];
        l +=1;
      }
      else{
        tmp[k] = v[r];
        r += 1;
      }
   }
   for ; i<=mid; {
    tmp[k] = tmp[i];
    k += 1,i+=1;
   }
   for ; j <=r ; {
    tmp[k] = tmp[j];
    k +=1 , j += 1;
   }
 }
 fn main()->i8{
   [Point:20] d;
   [Point] d;
   for i8 i = 0;i< 20; i++ {
      d[i] = {x:i * i,y:i};
   }
   MergeSort(d,d+20);
  0 
 }
--- a/include/syntax/Scanner.h
+++ b/include/syntax/Scanner.h
@ -0,0 +1,696 @@
 #pragma once 
 #include "doctest.h"
 #include "token.h"
 #include "../types.hpp"
 #include <cctype>
 #include <iostream>
 #include <sstream>
 #include <string>
 class Scanner {
 public:
    Scanner(module_t& module) 
        : module(module) {}
    inline std::vector<token_t> scan() {
        std::vector<token_t> tokens;
        while (!at_eof()) {
            if (skip_space()) {
                // 如果是空格或换行，则跳过
                continue;
            }
            token_t token = item();
            tokens.push_back(token);
        }
        tokens.push_back(token_t(TOKEN_EOF, "EOF", module.s_cursor.line, module.s_cursor.column));
        return tokens;
    }
 private:
    module_t& module;
    inline std::string gen_word() {
        return module.s_cursor.source.substr(module.s_cursor.current, module.s_cursor.length);
    }
    inline bool is_space(char c) {
        if (c == '\n' || c == '\t' || c == '\r' || c == ' ') {
            return true;
        }
        return false;
    }
    inline bool is_string(char s) {
        return s == '"';
    }
    inline bool is_float(std::string word) {
        // 是否包含 .,包含则为 float
        int dot_count = 0;
        bool has_e = false;
        for (std::string::size_type i = 0; i < word.size(); i++) {
            if (word[i] == '.')
                dot_count++;
            else if (word[i] == 'e' || word[i] == 'E')
                has_e = true;
        }
        // 结尾不能是 .
        if (word[-1] == '.') {
            std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: " 
                << module.s_cursor.column <<". floating-point numbers cannot end with '.'";
            return false;
        }
        // 如果有科学计数法标记，则认为是浮点数
        if (has_e) {
            return true;
        }
        if (dot_count == 0) {
            return false;
        }
        if (dot_count > 1) {
            std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: " 
                << module.s_cursor.column <<". floating-point numbers have multiple '.'";
            return false;
        }
        return true;
    }
    inline bool is_alpha(char c) {
        return std::isalpha(c);
    }
    inline bool is_number(char c) {
        return std::isdigit(c);
    }
    inline bool is_hex_number(char c) {
        return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
    }
    inline bool is_oct_number(char c) {
        return c >= '0' && c <= '7';
    }
    inline bool is_bin_number(char c) {
        return c == '0' || c == '1';
    }
    inline bool at_eof() {
        return module.s_cursor.source[module.s_cursor.guard] == '\0';
    }
    inline char guard_advance() {
        module.s_cursor.guard++;
        module.s_cursor.length++;
        module.s_cursor.column++;
        if (module.s_cursor.source[module.s_cursor.guard] == '\n') {
            module.s_cursor.line++;
            module.s_cursor.column = 0;
        }
        return module.s_cursor.source[module.s_cursor.guard];
    }
    inline bool match(char expected) {
        if (at_eof()) 
            return false;
        if (module.source[module.s_cursor.guard] != expected) 
            return false;
        guard_advance();
        return true;
    }
    inline std::string ident_advance() {
        while((is_alpha(module.s_cursor.source[module.s_cursor.guard]) ||
               is_number(module.s_cursor.source[module.s_cursor.guard])) && 
               !at_eof()) {
                guard_advance();
        }
        return gen_word();
    }
    inline token_type_t special_char() {
        char c = module.s_cursor.source[module.s_cursor.guard];
        guard_advance();
        switch (c) {
            case '(':
                return TOKEN_LEFT_PAREN;
            case ')':
                return TOKEN_RIGHT_PAREN;
            case '[':
                return TOKEN_LEFT_SQUARE;
            case ']':
                return TOKEN_RIGHT_SQUARE;
            case '{':
                return TOKEN_LEFT_CURLY;
            case '}':
                return TOKEN_RIGHT_CURLY;
            case ':':
                return TOKEN_COLON;
            case ';':
                return TOKEN_STMT_EOF;
            case ',':
                return TOKEN_COMMA;
            case '?':
                return TOKEN_QUESTION;
            case '%':
                return match('=') ? TOKEN_PERSON_EQUAL : TOKEN_PERSON;
            case '-':
                if (match('=')) {
                    return TOKEN_MINUS_EQUAL;
                }
                if (match('>')) {
                    return TOKEN_RIGHT_ARROW;
                }
                return TOKEN_MINUS;
            case '+':
                return match('=') ? TOKEN_PLUS_EQUAL : TOKEN_PLUS;
            case '/':
                return match('=') ? TOKEN_SLASH_EQUAL : TOKEN_SLASH;
            case '*': {
                return match('=') ? TOKEN_STAR_EQUAL : TOKEN_STAR;
            }
            case '.': {
                return TOKEN_DOT;
            }
            case '!':
                return match('=') ? TOKEN_NOT_EQUAL : TOKEN_NOT;
            case '=':
                return match('=') ? TOKEN_EQUAL_EQUAL : TOKEN_EQUAL;
            case '<':
                if (match('<')) {
                    // <<
                    if (match('=')) {
                        // <<=
                        return TOKEN_LEFT_SHIFT_EQUAL;
                    }
                    // <<
                    return TOKEN_LEFT_SHIFT;
                } else if (match('=')) {
                    return TOKEN_LESS_EQUAL;
                }
                return TOKEN_LEFT_ANGLE;
            case '>': {
                if (match('=')) {
                    // >=
                    return TOKEN_GREATER_EQUAL;
                }
                if (match('>') && match('=')) {
                    return TOKEN_RIGHT_SHIFT_EQUAL;
                }
                return TOKEN_RIGHT_ANGLE; // >
            }
            case '&':
                return match('&') ? TOKEN_AND_AND : TOKEN_AND;
            case '|':
                return match('|') ? TOKEN_OR_OR : TOKEN_OR;
            case '~':
                return TOKEN_TILDE;
            case '^':
                return match('=') ? TOKEN_XOR_EQUAL : TOKEN_XOR;
            default:
                return token_type_t::TOKEN_NOT_IN_THIS_TYPE;
        }
    }
    inline std::string string_advance() {
        module.s_cursor.guard++;
        char escape_char = '\\';
        std::stringstream buf;
        while (module.s_cursor.source[module.s_cursor.guard] != '\"' && !at_eof()) {
            char guard = module.s_cursor.source[module.s_cursor.guard];
            if (guard == '\n') {
                std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: " 
                << module.s_cursor.column <<". string cannot newline.";
            }
            // 处理转义字符
            if (guard == escape_char) {
                // 跳过转义字符第一个
                module.s_cursor.guard++;
                guard = module.s_cursor.source[module.s_cursor.guard];
                switch (guard) {
                    case 'n':
                        guard = '\n';
                        break;
                    case 't':
                        guard = '\t';
                        break;
                    case 'r':
                        guard = '\r';
                        break;
                    case 'b':
                        guard = '\b';
                        break;
                    case 'f':
                        guard = '\f';
                        break;
                    case 'a':
                        guard = '\a';
                        break;
                    case 'v':
                        guard = '\v';
                        break;
                    case '0':
                        guard = '\0';
                        break;
                    case '\\':
                    case '\'':
                    case '\"':
                        break;
                    default:
                        std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: " 
                        << module.s_cursor.column <<". unknown escape char " << guard;
                }
            }
            buf << guard;
            guard_advance();
        }
        //跳过close char
        module.s_cursor.guard++;
        return buf.str();
    }
    inline long number_convert(std::string word, int base) {
        try {
            long decimal = std::stol(word, 0, base);
            return decimal;
        } catch (const std::invalid_argument& e) {
            std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: " 
                << module.s_cursor.column <<". Invalid number: " << word << std::endl;
            return 0;
        } catch (const std::out_of_range& e) {
            std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: " 
                << module.s_cursor.column <<". Number out of range: " << word << std::endl;
            return 0;
        }
    }
    inline double number_convert_float(std::string word) {
        try {
            double decimal = std::stod(word);
            return decimal;
        } catch (const std::invalid_argument& e) {
            std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: "
                << module.s_cursor.column <<". Invalid number: " << word << std::endl;
            return 0;
        } catch (const std::out_of_range& e) {
            std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: "
                << module.s_cursor.column <<". Number out of range: " << word << std::endl;
            return 0;
        }
    }
    inline std::string hex_number_advance() {
        module.s_cursor.guard += 2; // 跳过 0x
        while (is_hex_number(module.s_cursor.source[module.s_cursor.guard]) && !at_eof()) {
            guard_advance();
        }
        return module.s_cursor.source.substr(module.s_cursor.current+2, module.s_cursor.length);
    }
    inline std::string oct_number_advance() {
        module.s_cursor.guard += 2; // 跳过 0o
        while (is_oct_number(module.s_cursor.source[module.s_cursor.guard]) && !at_eof()) {
            guard_advance();
        }
        return module.s_cursor.source.substr(module.s_cursor.current+2, module.s_cursor.length);
    }
    inline std::string bin_number_advance() {
        module.s_cursor.guard += 2; // 跳过 0b
        while (is_bin_number(module.s_cursor.source[module.s_cursor.guard]) && !at_eof()) {
            guard_advance();
        }
        return module.s_cursor.source.substr(module.s_cursor.current+2, module.s_cursor.length);
    }
    inline std::string number_advance() {
        while(is_number(module.s_cursor.source[module.s_cursor.guard]) && !at_eof()) {
            guard_advance();
        }
        // 处理小数部分
        if (module.s_cursor.source[module.s_cursor.guard] == '.' && is_number(module.s_cursor.source[module.s_cursor.guard + 1])) {
            guard_advance(); // 跳过小数点
            while(is_number(module.s_cursor.source[module.s_cursor.guard]) && !at_eof()) {
                guard_advance();
            }
        }
        // 处理科学计数法
        if ((module.s_cursor.source[module.s_cursor.guard] == 'e' || module.s_cursor.source[module.s_cursor.guard] == 'E')
            && (is_number(module.s_cursor.source[module.s_cursor.guard + 1]) || 
                module.s_cursor.source[module.s_cursor.guard + 1] == '+' || 
                module.s_cursor.source[module.s_cursor.guard + 1] == '-')) {
            guard_advance(); // 跳过 e 或 E
            if (module.s_cursor.source[module.s_cursor.guard] == '+' || module.s_cursor.source[module.s_cursor.guard] == '-') {
                guard_advance(); // 跳过符号
            }
            while(is_number(module.s_cursor.source[module.s_cursor.guard]) && !at_eof()) {
                guard_advance();
            }
        }
        return gen_word();
    }
    static token_type_t scanner_ident(std::string word, int length) {
        switch (word[0]) {
            case 'a': {
                switch (word[1]) {
                    case 'r': {
                        if (word.substr(2, 3) == "ray" && word.size() == 5) {
                            return TOKEN_ARR;
                        }
                    }
                }
                break;
            }
            case 'b': {
                switch (word[1]) {
                    case 'o':
                        if (word.substr(2, 2) == "ol" && word.size() == 4) {
                            return TOKEN_BOOL;
                        }
                    case 'r':
                        if (word.substr(2, 3) == "eak" && word.size() == 5) {
                            return TOKEN_BREAK;
                        }
                }
                break;
            }
            case 'c': {
                switch (word[1]) {
                    case 'o':
                        if (word.substr(2, 6) == "ntinue" && word.size() == 8) {
                            return TOKEN_CONTINUE;
                        }
                }
                break;
            }
            case 'e': {
                if (word.substr(1, 3) == "lse" && word.size() == 4) {
                    return TOKEN_ELSE;
                }
            }
            case 'f': {
                switch (word[1]) {
                    case 'n':
                        if (word.size() == 2) {
                            return TOKEN_FN;
                        }
                    case 'a':
                        if (word.substr(2, 3) == "lse" && word.size() == 5) {
                            return TOKEN_FALSE;
                        }
                    case '3':
                        if (word.substr(2, 1) == "2" && word.size() == 3) {
                            return TOKEN_F32;
                        }
                    case '6':
                        if (word.substr(2, 1) == "4" && word.size() == 3) {
                            return TOKEN_F64;
                        }
                    case 'o':
                        if (word.substr(2, 1) == "r" && word.size() == 3) {
                            return TOKEN_FOR;
                        }
                }
                break;
            }
            case 'i': {
                switch (word[1]) {
                    case 'f':
                        if (word.size() == 2) {
                            return TOKEN_IF;
                        }
                    case '8':
                        if (word.size() == 2) {
                            return TOKEN_I8;
                        }
                    case '1':
                        if (word.substr(2, 1) == "6" && word.size() == 3) {
                            return TOKEN_I16;
                        }
                    case '3':
                        if (word.substr(2, 1) == "2" && word.size() == 3) {
                            return TOKEN_I32;
                        }
                    case '6':
                        if (word.substr(2, 1) == "4" && word.size() == 3) {
                            return TOKEN_I64;
                        }
                }
                break;
            }
            case 's': {
                // self,string,struct,sizeof,sett
                if (length == 6 && word[1] == 't' && word[2] == 'r') {
                    switch (word[3]) {
                        case 'i':
                            if (word.substr(4, 2) == "ng" && word.size() == 6) {
                                return TOKEN_STRING;
                            }
                        case 'u':
                            if (word.substr(4, 2) == "ct" && word.size() == 6) {
                                return TOKEN_STRUCT;
                            }
                    }
                }
                break;
            }
            case 't': {
                // tup/throw/type/true
                switch (word[1]) {
                    case 'y': // type
                        if (word.substr(2, 2) == "pe" && word.size() == 4) {
                            return TOKEN_TYPE;
                        }
                    case 'u': // tup
                        if (word.substr(2, 1) == "p" && word.size() == 3) {
                            return TOKEN_TUP;
                        }
                    case 'r': {
                        switch (word[2]) {
                            case 'u':
                                if (word.substr(3, 1) == "e" && word.size() == 4) {
                                    return TOKEN_TRUE;
                                }
                        }
                        break;
                    }
                }
                break;
            }
            case 'v': {
                switch (word[1]) {
                    case 'a':
                        if (word.substr(2, 1) == "r" && word.size() == 3) {
                            return TOKEN_VAR;
                        }
                    case 'e': // vec
                        if (word.substr(2, 1) == "c" && word.size() == 3) {
                            return TOKEN_VEC;
                        }
                }
            }
            case 'u': {
                switch (word[1]) {
                    case '8':
                        if (word.size() == 2) {
                            return TOKEN_U8;
                        }
                    case '1':
                        if (word.substr(2, 1) == "6" && word.size() == 3) {
                            return TOKEN_U16;
                        }
                    case '3':
                        if (word.substr(2, 1) == "2" && word.size() == 3) {
                            return TOKEN_U32;
                        }
                    case '6':
                        if (word.substr(2, 1) == "4" && word.size() == 3) {
                            return TOKEN_U64;
                        }
                }
                break;
            }
            case 'm': {
                // map
                switch (word[1]) {
                    case 'a': {
                        switch (word[2]) {
                            case 'p':
                                if (word.size() == 3) {
                                    return TOKEN_MAP;
                                }
                        }
                    }
                }
            }
            case 'r': {
                if (word.substr(1, 5) == "eturn" && word.size() == 6) {
                    // return
                    return TOKEN_RETURN;
                }
            }
        }
        return TOKEN_IDENT;
    }
    inline bool multi_comment_end() {
        return module.s_cursor.source[module.s_cursor.guard] == '*' && 
               module.s_cursor.source[module.s_cursor.guard + 1] == '/';
    }
    inline bool skip_space() {
        bool has_new = false;
        if (module.s_cursor.guard != module.s_cursor.current) {
            module.s_cursor.space_prev = module.s_cursor.source[module.s_cursor.guard - 1];
        }
        while (true) {
            char c = module.s_cursor.source[module.s_cursor.guard];
            switch (c) {
                case ' ':
                case '\r':
                case '\t': {
                    guard_advance();
                    break;
                }
                case '\n': {
                    guard_advance();
                    has_new = true;
                    break;
                }
                case '/': {
                    if (module.s_cursor.source[module.s_cursor.guard + 1] == '/') {
                        // 单行注释
                        while (module.s_cursor.source[module.s_cursor.guard] != '\n' && !at_eof()) {
                            guard_advance();
                        }
                        break;
                    } else if (module.s_cursor.source[module.s_cursor.guard + 1] == '*') {
                        while (!multi_comment_end()) {
                            if (at_eof()) {
                                std::cerr << "[ERROR] line: " << module.s_cursor.line << ". column: " 
                                    << module.s_cursor.column <<". multi comment not end.";
                                return false;
                            }
                            guard_advance();
                        }
                        guard_advance(); // 跳过 *
                        guard_advance(); // 跳过 /
                        break;
                    } else {
                        module.s_cursor.space_next = module.s_cursor.source[module.s_cursor.guard];
                        return has_new;
                    }
                    break;
                }
                default: {
                    module.s_cursor.space_next = module.s_cursor.source[module.s_cursor.guard];
                    return has_new;
                }
            }
        }
    }
    inline token_t item() {
        module.s_cursor.length = 0; // 重置长度
        module.s_cursor.current = module.s_cursor.guard; // 重置游标位置
        if (is_alpha(module.s_cursor.source[module.s_cursor.guard])) {
            std::string word = ident_advance();
            return token_t(scanner_ident(word, word.size()),word, module.s_cursor.line, module.s_cursor.column);
        }
        if (is_number(module.s_cursor.source[module.s_cursor.guard])) {
            std::string word;
            long decimal = 0;
            if (module.s_cursor.source[module.s_cursor.guard] == '0') {
                // 可能是十六进制、八进制或二进制
                if (module.s_cursor.source[module.s_cursor.guard + 1] == 'x' || 
                    module.s_cursor.source[module.s_cursor.guard + 1] == 'X') {
                    word = hex_number_advance();
                    decimal = number_convert(word, 16);
                } else if (module.s_cursor.source[module.s_cursor.guard + 1] == 'o' || 
                           module.s_cursor.source[module.s_cursor.guard + 1] == 'O') {
                    word = oct_number_advance();
                    decimal = number_convert(word, 8);
                } else if (module.s_cursor.source[module.s_cursor.guard + 1] == 'b' || 
                           module.s_cursor.source[module.s_cursor.guard + 1] == 'B') {
                    word = bin_number_advance();
                    decimal = number_convert(word, 2);
                }
                else {
                    word = number_advance();
                    if (word.size() > 1) word = word.substr(1, word.size() - 1);
                    decimal = number_convert(word,10);
                }
            } else {
                word = number_advance();
                decimal = number_convert(word, 10);
            }
            token_type_t type;
            if (is_float(word)) {
                type = TOKEN_LITERAL_FLOAT;
            } else {
                type = TOKEN_LITERAL_INT;
            }
            return token_t(type, word, module.s_cursor.line, module.s_cursor.column);
        }
        if (is_string(module.s_cursor.source[module.s_cursor.guard])) {
            std::string word = string_advance();
            return token_t(TOKEN_LITERAL_STRING, word, module.s_cursor.line, module.s_cursor.column);
        }
        token_type_t type = special_char();
        return token_t(type, gen_word(), module.s_cursor.line, module.s_cursor.column);
    }
 };
--- a/include/syntax/token.h
+++ b/include/syntax/token.h
@ -0,0 +1,201 @@
 #pragma once 
 #include <iostream>
 #include <string>
 #include <unordered_map>
 #define DEBUG_SCANNER
 enum token_type_t {
    TOKEN_NOT_IN_THIS_TYPE = 0,
    TOKEN_LEFT_PAREN,
    TOKEN_RIGHT_PAREN,// ()
    TOKEN_LEFT_SQUARE,
    TOKEN_RIGHT_SQUARE,// []
    TOKEN_LEFT_CURLY,
    TOKEN_RIGHT_CURLY,// {}
    TOKEN_LEFT_ANGLE, // <
    TOKEN_LESS_THAN,  // <
    TOKEN_RIGHT_ANGLE,// >
    TOKEN_COMMA,      // ,
    TOKEN_DOT,        // .
    TOKEN_MINUS,      // -
    TOKEN_PLUS,       // +
    TOKEN_COLON,      // :
    TOKEN_SEMICOLON,  // ;
    TOKEN_SLASH,      // /
    TOKEN_STAR,       //  a * b, *a
    TOKEN_PERSON,     // %
    TOKEN_QUESTION,   // ?
    TOKEN_RIGHT_ARROW,// ->
    TOKEN_NOT,// !
    TOKEN_NOT_EQUAL,
    TOKEN_EQUAL,
    TOKEN_EQUAL_EQUAL,
    TOKEN_GREATER_EQUAL,// >=
    TOKEN_LESS_EQUAL,   // <=
    TOKEN_AND_AND,      // &&
    TOKEN_OR_OR,        // ||
    TOKEN_PLUS_EQUAL,       // +=
    TOKEN_MINUS_EQUAL,      // -=
    TOKEN_STAR_EQUAL,       // *=
    TOKEN_SLASH_EQUAL,      // /=
    TOKEN_PERSON_EQUAL,     // %=
    TOKEN_AND_EQUAL,        // &=
    TOKEN_OR_EQUAL,         // |=
    TOKEN_XOR_EQUAL,        // ^=
    TOKEN_LEFT_SHIFT_EQUAL, // <<=
    TOKEN_RIGHT_SHIFT_EQUAL,// >>=
    // 位运算
    TOKEN_TILDE,      // ~
    TOKEN_AND,        // &
    TOKEN_OR,         // |
    TOKEN_XOR,        // ^
    TOKEN_LEFT_SHIFT, // <<
    TOKEN_RIGHT_SHIFT,// >>
    // 字面量
    TOKEN_IDENT,      // 标识符
    TOKEN_LITERAL_STRING,
    TOKEN_LITERAL_FLOAT,
    TOKEN_LITERAL_INT,
    // 类型
    TOKEN_STRING,
    TOKEN_BOOL,
    TOKEN_U8,
    TOKEN_U16,
    TOKEN_U32,
    TOKEN_U64,
    TOKEN_I8,
    TOKEN_I16,
    TOKEN_I32,
    TOKEN_I64,
    TOKEN_F32,
    TOKEN_F64,
    // 内置复合类型
    TOKEN_ARR,
    TOKEN_VEC,
    TOKEN_MAP,
    TOKEN_TUP,
    // 关键字
    TOKEN_VAR,
    TOKEN_TRUE,
    TOKEN_FALSE,
    TOKEN_TYPE,
    TOKEN_STRUCT,
    TOKEN_CONTINUE,
    TOKEN_BREAK,
    TOKEN_FOR,
    TOKEN_IN,
    TOKEN_IF,
    TOKEN_ELSE,
    TOKEN_ELSE_IF,
    TOKEN_FN,
    TOKEN_RETURN,
    TOKEN_STMT_EOF, // ;
    TOKEN_EOF,// TOKEN_EOF 一定要在最后一个，否则会索引溢出
 };
 inline static std::unordered_map<token_type_t, std::string> token_str = {
    {TOKEN_LEFT_PAREN, "("},
    {TOKEN_RIGHT_PAREN, ")"},
    {TOKEN_LEFT_SQUARE, "["},
    {TOKEN_RIGHT_SQUARE, "]"},
    {TOKEN_LEFT_CURLY, "{"},
    {TOKEN_RIGHT_CURLY, "}"},
    {TOKEN_LEFT_ANGLE, "<"},
    {TOKEN_LESS_THAN, "<"},
    {TOKEN_RIGHT_ANGLE, ">"},
    {TOKEN_COMMA, ","},
    {TOKEN_DOT, "."},
    {TOKEN_MINUS, "-"},
    {TOKEN_PLUS, "+"},
    {TOKEN_COLON, ":"},
    {TOKEN_SEMICOLON, ";"},
    {TOKEN_SLASH, "/"},
    {TOKEN_STAR, "*"},
    {TOKEN_PERSON, "%"},
    {TOKEN_QUESTION, "?"},
    {TOKEN_RIGHT_ARROW, "->"},
    {TOKEN_NOT, "!"},
    {TOKEN_NOT_EQUAL, "!="},
    {TOKEN_EQUAL, "="},
    {TOKEN_EQUAL_EQUAL, "=="},
    {TOKEN_GREATER_EQUAL, ">="},
    {TOKEN_LESS_EQUAL, "<="},
    {TOKEN_AND_AND, "&&"},
    {TOKEN_OR_OR, "||"},
    {TOKEN_PLUS_EQUAL, "+="},
    {TOKEN_MINUS_EQUAL, "-="},
    {TOKEN_STAR_EQUAL, "*="},
    {TOKEN_SLASH_EQUAL, "/="},
    {TOKEN_PERSON_EQUAL, "%="},
    {TOKEN_AND_EQUAL, "&="},
    {TOKEN_OR_EQUAL, "|="},
    {TOKEN_XOR_EQUAL, "^="},
    {TOKEN_LEFT_SHIFT_EQUAL, "<<="},
    {TOKEN_RIGHT_SHIFT_EQUAL, ">>="},
    {TOKEN_TILDE, "~"},
    {TOKEN_AND, "&"},
    {TOKEN_OR, "|"},
    {TOKEN_XOR, "^"},
    {TOKEN_LEFT_SHIFT, "<<"},
    {TOKEN_RIGHT_SHIFT, ">>"},
    {TOKEN_IDENT, "ident_literal"},
    {TOKEN_LITERAL_STRING, "string_literal"},
    {TOKEN_LITERAL_FLOAT, "float_literal"},
    {TOKEN_LITERAL_INT, "int_literal"},
    {TOKEN_STRING, "string"},
    {TOKEN_BOOL, "bool"},
    {TOKEN_U8, "u8"},
    {TOKEN_U16, "u16"},
    {TOKEN_U32, "u32"},
    {TOKEN_U64, "u64"},
    {TOKEN_I8, "i8"},
    {TOKEN_I16, "i16"},
    {TOKEN_I32, "i32"},
    {TOKEN_I64, "i64"},
    {TOKEN_F32, "f32"},
    {TOKEN_F64, "f64"},
    {TOKEN_ARR, "arr"},
    {TOKEN_VEC, "vec"},
    {TOKEN_MAP, "map"},
    {TOKEN_TUP, "tup"},
    {TOKEN_VAR, "var"},
    {TOKEN_TRUE, "true"},
    {TOKEN_FALSE, "false"},
    {TOKEN_TYPE, "type"},
    {TOKEN_STRUCT, "struct"},
    {TOKEN_CONTINUE, "continue"},
    {TOKEN_BREAK, "break"},
    {TOKEN_FOR, "for"},
    {TOKEN_IN, "in"},
    {TOKEN_IF, "if"},
    {TOKEN_ELSE, "else"},
    {TOKEN_ELSE_IF, "else if"},
    {TOKEN_FN, "fn"},
    {TOKEN_RETURN, "return"},
    {TOKEN_STMT_EOF, ";"},
    {TOKEN_EOF, "\0"}
 };
 struct token_t {
    token_type_t type;
    std::string literal;
    int line;
    int column;
    int length;
    token_t(token_type_t token_type, std::string literal, int line, int column) 
        : type(token_type), literal(literal), line(line), column(column), length(literal.size()) {
 #ifdef DEBUG_SCANNER
            std::cout << "[DEBUG] SCANNER line: " << line << ", type: " << token_str[token_type] << ", literal: " << literal << std::endl;
 #endif
        }
 };
--- a/include/types.hpp
+++ b/include/types.hpp
@ -0,0 +1,37 @@
 #pragma once
 #include <string>
 #include <vector>
 #include "syntax/token.h"
 struct scanner_cursor_t {
    std::string source;
    std::string::size_type current;
    std::string::size_type guard;
    int length;
    int line;  // 扫描器当前所在的行
    int column;   // 扫描器当前所在的列
    char space_prev;  // 记录空行，注释前的上一个字符
    char space_next;
 };
 struct module_t {
    std::string source;
    scanner_cursor_t s_cursor;
    std::vector<token_t> token_list;
    module_t(std::string source)
        : source(source) {
            s_cursor.source = source;
            s_cursor.line = 1;
            s_cursor.column = 1;
            s_cursor.length = 0;
            s_cursor.current = 0;
            s_cursor.guard = 0;
            s_cursor.space_prev = '\0';
            s_cursor.space_next = '\0';
    }
 };
--- a/input.txt
+++ b/input.txt
@ -0,0 +1,52 @@
 //Merge Sort
 struct Point {
  x:i8;
  y:i8;
 }
 //  Struct
 /*
 ababa
 */
 [Point:105] tmp;
 fn MergeSort([Point:20] v,i8 l,i8 r) -> {
   if l>r {
      return ;   
   }
   var mid = l + r >>1;
   MergeSort(v,l,mid);
   MergeSort(v,mid+1,r);
   i8 i=l,j=mid+1,k=l;
   for ;i <= mid && j <= r;k+=1 {
      if v[l] < v[r]
      {
        tmp[k] = v[l];
        l +=1;
      }
      else{
        tmp[k] = v[r];
        r += 1;
      }
   }
   for ; i<=mid; {
    tmp[k] = tmp[i];
    k += 1,i+=1;
   }
   for ; j <=r ; {
    tmp[k] = tmp[j];
    k +=1 , j += 1;
   }
 }
 fn main()->i8{
   [Point:20] d;
   [Point] d;
   for i8 i = 0;i< 20; i++ {
      d[i] = {x:i * i,y:i};
   }
   MergeSort(d,d+20);
  0 
 }
--- a/src/Scanner.cpp
+++ b/src/Scanner.cpp
@ -1,6 +1,5 @@
 #include "Scanner.h"
 #include <sstream>
 #include <cctype>
 int Scanner::process_const_table(int index) {
    return 0;
@ -35,45 +34,7 @@ int Scanner::process_identifier_table(int index) {
 }
 int Scanner::process_key_table(int index) {
-    int max_len = 0;
+    return 0;
    int found_key = -1; // 存储找到的关键字编号
    // 遍历关键字表
    for (const auto& pair : m_tables.KeyTable) {
        const std::string& keyword = pair.second;
        int len = keyword.length();
        // 检查剩余长度是否足够
        if (index + len > m_source_code.length()) {
            continue;
        }
        // 比较子串是否匹配关键字
        if (m_source_code.substr(index, len) == keyword) {
            // 检查关键字后是否紧跟字母/数字/下划线
            if (index + len < m_source_code.length()) {
                char next_char = m_source_code[index + len];
                if (isalnum(next_char) || next_char == '_') {
                    continue; // 是标识符的一部分，跳过
                }
            }
            // 更新最长匹配（解决"float32"和"float64"的冲突）
            if (len > max_len) {
                max_len = len;
                found_key = pair.first;
            }
        }
    }
    // 找到有效关键字
    if (max_len > 0) {
        Token token({ found_key, KEY_TABLE });
        m_token_list.push_back(token);
        return max_len;
    }
    return 0; // 未识别到关键字
 }
 int Scanner::process_punct_table(int index) {
--- a/src/Tbs.cpp
+++ b/src/Tbs.cpp
@ -1 +0,0 @@
 #include "Tbs.h"
--- a/src/input.txt
+++ b/src/input.txt
@ -0,0 +1,52 @@
 //Merge Sort
 struct Point {
  x:i8;
  y:i8;
 }
 //  Struct
 /*
 ababa
 */
 [Point:105] tmp;
 fn MergeSort([Point:20] v,i8 l,i8 r) -> {
   if l>r {
      return ;   
   }
   var mid = l + r >>1;
   MergeSort(v,l,mid);
   MergeSort(v,mid+1,r);
   i8 i=l,j=mid+1,k=l;
   for ;i <= mid && j <= r;k+=1 {
      if v[l] < v[r]
      {
        tmp[k] = v[l];
        l +=1;
      }
      else{
        tmp[k] = v[r];
        r += 1;
      }
   }
   for ; i<=mid; {
    tmp[k] = tmp[i];
    k += 1,i+=1;
   }
   for ; j <=r ; {
    tmp[k] = tmp[j];
    k +=1 , j += 1;
   }
 }
 fn main()->i8{
   [Point:20] d;
   [Point] d;
   for i8 i = 0;i< 20; i++ {
      d[i] = {x:i * i,y:i};
   }
   MergeSort(d,d+20);
  0 
 }
--- a/unit/input.txt
+++ b/unit/input.txt
@ -0,0 +1,52 @@
 //Merge Sort
 struct Point {
  x:i8;
  y:i8;
 }
 //  Struct
 /*
 ababa
 */
 [Point:105] tmp;
 fn MergeSort([Point:20] v,i8 l,i8 r) -> {
   if l>r {
      return ;   
   }
   var mid = l + r >>1;
   MergeSort(v,l,mid);
   MergeSort(v,mid+1,r);
   i8 i=l,j=mid+1,k=l;
   for ;i <= mid && j <= r;k+=1 {
      if v[l] < v[r]
      {
        tmp[k] = v[l];
        l +=1;
      }
      else{
        tmp[k] = v[r];
        r += 1;
      }
   }
   for ; i<=mid; {
    tmp[k] = tmp[i];
    k += 1,i+=1;
   }
   for ; j <=r ; {
    tmp[k] = tmp[j];
    k +=1 , j += 1;
   }
 }
 fn main()->i8{
   [Point:20] d;
   [Point] d;
   for i8 i = 0;i< 20; i++ {
      d[i] = {x:i * i,y:i};
   }
   MergeSort(d,d+20);
  0 
 }
--- a/unit/scanner_test.cpp
+++ b/unit/scanner_test.cpp
@ -1,30 +1,21 @@
-#include "Token.h"
+
 #include "doctest.h"
 #include "stdc++.h"
-#include "Scanner.h"
+#include "types.hpp"
-#include "Tbs.h"
+#include "syntax/Scanner.h"
 #include "syntax/token.h"
 #include <vector>
 using std::string,std::vector;
-TEST_CASE("Scanner test identifier table") {
+TEST_CASE("Scanner test") {
-    Tbs tables;
+    std::ifstream t("input.txt");
-    std::string src = "abcvljl laadfs fafarwrw";
+    std::stringstream buffer;
-    Scanner scan(src, tables);
+    buffer<<t.rdbuf();
-    scan.scan();
+    std::cout<<buffer.str();
    module_t module(buffer.str());
    Scanner scanner(module);
-    for (auto value_src: scan.get_token_list()) {
+    scanner.scan();
        std::cout << value_src.id << " " << value_src.type << "\n";
    }
 }
 TEST_CASE("Scanner test Punct table") {
    Tbs tables = {};
    std::string src = "+=---<<=>>>===--((([]--<<<>.";
    Scanner scan(src, tables);
    scan.scan();
    std::cout<<"test\n";
    for (auto e : scan.get_token_list()) {
        std::cout<<e.id<<" "<<e.type<<" "<<tables.PunctTable[e.id]<<"\n";
    }
 }