code cleanup

2023-11-20 17:26:37 -05:00 · 2023-11-20 17:26:37 -05:00 · 9402da5f79
parent 308f7876ba
commit 9402da5f79
7 changed files with 1569 additions and 258 deletions
--- a/include/bf_mips_codegen.h
+++ b/include/bf_mips_codegen.h
@ -11,6 +11,6 @@
 #include <bf_tokenizer.h>
 #include <fstream>

-void codegen(const std::vector<token_t>& tokens, std::ostream& out);
+void codegen(tokenizer& tokenizer, std::ostream& out);

 #endif //BRAINFUCK_MISC_BF_MIPS_CODEGEN_H
--- a/include/bf_tokenizer.h
+++ b/include/bf_tokenizer.h
@ -27,13 +27,53 @@ enum class bf_token
 struct token_t
 {
    bf_token token;
-    std::optional<std::string> name = {};
+    std::optional<uint64_t> name = {};
    
-    explicit token_t(bf_token token): token(token) {}
+    explicit token_t(bf_token token): token(token)
+    {}
 };

-std::vector<token_t> tokenize(const std::string& program);
-std::vector<token_t>& bf_name(std::vector<token_t>& tokens);
+typedef std::optional<std::reference_wrapper<const token_t>> optional_token;

+class tokenizer
+{
+    private:
+        std::vector<token_t> tokens;
+        size_t conditionalCount = 0;
+        size_t currentIndex = 0;
+        
+        void tokenize(const std::string& program);
+        
+        void bf_name();
+    
+    public:
+        explicit tokenizer(const std::string& program)
+        {
+            tokenize(program);
+            bf_name();
+        }
+        
+        inline bool hasNext(){
+            return currentIndex < tokens.size();
+        }
+        
+        inline void advance(){
+            currentIndex++;
+        }
+        
+        inline optional_token next() {
+            if (currentIndex >= tokens.size())
+                return {};
+            return tokens[currentIndex++];
+        }
+        
+        inline optional_token peek(){
+            if (currentIndex >= tokens.size())
+                return {};
+            return tokens[currentIndex];
+        }
+        
+        void print(size_t index = 0);
+};

 #endif //BRAINFUCK_MISC_BF_TOKENIZER_H
--- a/mips.asm
+++ b/mips.asm
--- a/squares.bf
+++ b/squares.bf
@ -0,0 +1,8 @@
++++[>+++++<-]>[<+++++>-]+<+[
+    >[>+>+<<-]++>>[<<+>>-]>>>[-]++>[-]+
+    >>>+[[-]++++++>>>]<<<[[<++++++++<++>>-]+<.<[>----<-]<]
+    <<[>>>>>[>>>[-]+++++++++<[>-<-]+++++++++>[-[<->-]+[<<<]]<[>+<-]>]<<-]<<-
+]
+[Outputs square numbers from 0 to 10000.
+Daniel B Cristofani (cristofdathevanetdotcom)
+http://www.hevanet.com/cristofd/brainfuck/]
--- a/src/bf_mips_codegen.cpp
+++ b/src/bf_mips_codegen.cpp
@ -27,22 +27,22 @@ std::string preamble = ".data\n"

 void process_print(const std::vector<token_t>& tokens, size_t index);

-void codegen(const std::vector<token_t>& tokens, std::ostream& out)
+void codegen(tokenizer& tokenizer, std::ostream& out)
 {
    out << preamble;
    size_t index = 0;
    // skip past comments
-    if (tokens[index].token == bf_token::OPEN)
+    if (tokenizer.next().value().get().token == bf_token::OPEN)
        while (tokens[index].token != bf_token::CLOSE)
            index++;
    
-    process_print(tokens, index);
+    tokenizer.print(index);
    
    size_t sp = 0;
    while (index < tokens.size())
    {
        auto& token = tokens[index++];
-        std::string name{"UNNAMED"};
+        uint64_t name = 0;
        if (token.name.has_value())
            name = token.name.value();
        switch (token.token)
@ -75,76 +75,17 @@ void codegen(const std::vector<token_t>& tokens, std::ostream& out)
                 break;
            case bf_token::OPEN:
                out       << "\tlb    $t1, ($t0)\n"
-                          << "\tbeqz  $t1, BF_CLOSE_" << name << "_" << std::to_string(sp) << '\n'
-                          << "BF_OPEN_" << name << "_" << std::to_string(sp) << ":\n";
+                          << "\tbeqz  $t1, BF_CLOSE_" << name << "_" << sp << '\n'
+                          << "BF_OPEN_" << name << "_" << sp << ":\n";
                sp++;
                break;
            case bf_token::CLOSE:
                sp--;
                out       << "\tlb    $t1, ($t0)\n"
-                          << "\tbnez  $t1, BF_OPEN_" << name << "_" << std::to_string(sp) << '\n'
-                          << "BF_CLOSE_" << name << "_" << std::to_string(sp) << ":\n";
+                          << "\tbnez  $t1, BF_OPEN_" << name << "_" << sp << '\n'
+                          << "BF_CLOSE_" << name << "_" << sp << ":\n";
                break;
        }
    }
 }

-inline void tabulate(size_t v)
-{
-    for (size_t i = 0; i < v; i++)
-        std::cout << '\t';
-}
-
-void process_print(const std::vector<token_t>& tokens, size_t index)
-{
-    size_t sp = 0;
-    while (index < tokens.size())
-    {
-        auto& token = tokens[index++];
-        switch (token.token)
-        {
-            case bf_token::INC_DP:
-                tabulate(sp);
-                std::cout << "Increase DP\n";
-                break;
-            case bf_token::DEC_DP:
-                tabulate(sp);
-                std::cout << "Decrease DP\n";
-                break;
-            case bf_token::INC_DV:
-                tabulate(sp);
-                std::cout << "Increase DV\n";
-                break;
-            case bf_token::DEC_DV:
-                tabulate(sp);
-                std::cout << "Decrease DV\n";
-                break;
-            case bf_token::PRINT:
-                tabulate(sp);
-                std::cout << "Print\n";
-                break;
-            case bf_token::READ:
-                tabulate(sp);
-                std::cout << "Read\n";
-                break;
-            case bf_token::OPEN:
-                tabulate(sp);
-                std::cout << "If(";
-                if (token.name.has_value())
-                    std::cout << token.name.value() << "\n";
-                else
-                    std::cout << "UNNAMED" << "\n";
-                sp++;
-                break;
-            case bf_token::CLOSE:
-                sp--;
-                tabulate(sp);
-                if (token.name.has_value())
-                    std::cout << token.name.value();
-                else
-                    std::cout << "UNNAMED";
-                std::cout << ")\n";
-                break;
-        }
-    }
-}
--- a/src/bf_tokenizer.cpp
+++ b/src/bf_tokenizer.cpp
@ -6,11 +6,25 @@
 #include <bf_tokenizer.h>
 #include <sstream>
 #include <random>
+#include <iostream>

-std::vector<token_t> tokenize(const std::string& program)
+std::string generateName()
 {
-    std::vector<token_t> tokens;
+    std::stringstream ss;
+    ss << std::hex;
+    static std::random_device rd;
+    static std::seed_seq seed{rd(), rd(), rd(), rd()};
+    static std::mt19937_64 gen(seed);
+    static std::uniform_int_distribution<int> dis(0, 15);
    
+    for (int i = 0; i < 24; i++)
+        ss << dis(gen);
+    
+    return ss.str();
+}
+
+void tokenizer::tokenize(const std::string& program)
+{
    size_t index = 0;
    while (index < program.size())
    {
@ -46,33 +60,16 @@ std::vector<token_t> tokenize(const std::string& program)
        }
        index++;
    }
-    
-    return tokens;
 }

-std::string generateName()
-{
-    std::stringstream ss;
-    ss << std::hex;
-    std::random_device rd;
-    std::seed_seq seed{rd(), rd(), rd(), rd()};
-    std::mt19937_64 gen(seed);
-    std::uniform_int_distribution<int> dis(0, 15);
-    
-    for (int i = 0; i < 24; i++)
-        ss << dis(gen);
-    
-    return ss.str();
-}
-
-std::vector<token_t>& bf_name(std::vector<token_t>& tokens)
+void tokenizer::bf_name()
 {
    size_t search_index = 0;
    while (search_index < tokens.size())
    {
        if (tokens[search_index].token == bf_token::OPEN)
        {
-            auto name = generateName();
+            auto name = ++conditionalCount;
            size_t sp = 1;
            size_t search_2 = search_index;
            tokens[search_index].name = name;
@ -89,5 +86,64 @@ std::vector<token_t>& bf_name(std::vector<token_t>& tokens)
        }
        search_index++;
    }
-    return tokens;
+}
+
+inline void tabulate(signed long long v)
+{
+    for (signed long long i = 0; i < v; i++)
+        std::cout << '\t';
+}
+
+void tokenizer::print(size_t index)
+{
+    signed long long sp = 1;
+    while (index < tokens.size())
+    {
+        auto& token = tokens[index++];
+        switch (token.token)
+        {
+            case bf_token::INC_DP:
+                tabulate(sp);
+                std::cout << "Increase DP\n";
+                break;
+            case bf_token::DEC_DP:
+                tabulate(sp);
+                std::cout << "Decrease DP\n";
+                break;
+            case bf_token::INC_DV:
+                tabulate(sp);
+                std::cout << "Increase DV\n";
+                break;
+            case bf_token::DEC_DV:
+                tabulate(sp);
+                std::cout << "Decrease DV\n";
+                break;
+            case bf_token::PRINT:
+                tabulate(sp);
+                std::cout << "Print\n";
+                break;
+            case bf_token::READ:
+                tabulate(sp);
+                std::cout << "Read\n";
+                break;
+            case bf_token::OPEN:
+                tabulate(sp);
+                std::cout << "If(";
+                if (token.name.has_value())
+                    std::cout << token.name.value() << "\n";
+                else
+                    std::cout << "UNNAMED" << "\n";
+                sp++;
+                break;
+            case bf_token::CLOSE:
+                sp--;
+                tabulate(sp);
+                if (token.name.has_value())
+                    std::cout << token.name.value();
+                else
+                    std::cout << "UNNAMED";
+                std::cout << ")\n";
+                break;
+        }
+    }
 }
--- a/src/main.cpp
+++ b/src/main.cpp
@ -8,16 +8,15 @@

 int main(int argc, const char** argv)
 {
-    std::string file{"../helloworld.bf"};
+    std::string file{"../life.bf"};
    if (argc > 1)
        file = argv[1];
    auto program = blt::fs::loadBrainFuckFile(file);
    
    std::ofstream out{"../mips.asm"};
    
-    auto tokens = tokenize(program);
-    bf_name(tokens);
-    codegen(tokens, out);
+    tokenizer tokenizer(program);
+    codegen(tokenizer, out);
    
    return 0;
 }