From 7fe860d5d4b70a8da820bd0b3eb8b47ed3c62cff Mon Sep 17 00:00:00 2001 From: Brett Laptop Date: Mon, 20 Nov 2023 20:33:51 -0500 Subject: [PATCH] sexy new codegen --- mips.asm | 27 +++--- mips2.asm | 181 ++++++---------------------------------- src/bf_mips_codegen.cpp | 49 +++++------ src/bf_tokenizer.cpp | 70 +++++++++++----- 4 files changed, 112 insertions(+), 215 deletions(-) diff --git a/mips.asm b/mips.asm index 3cfa3fc..4695ee5 100644 --- a/mips.asm +++ b/mips.asm @@ -38,8 +38,11 @@ bf: addi $t1, $t1, 1 sb $t1, ($t0) lb $t1, ($t0) - beqz $t1, BF_CLOSE_1_0 -BF_OPEN_1_0: + addi $t1, $t1, 1 + sb $t1, ($t0) + lb $t1, ($t0) + beqz $t1, BF_CLOSE_03e0a731165036472c4dae07_0 +BF_OPEN_03e0a731165036472c4dae07_0: addi $t0, $t0, 1 lb $t1, ($t0) addi $t1, $t1, 1 @@ -54,8 +57,8 @@ BF_OPEN_1_0: addi $t1, $t1, 1 sb $t1, ($t0) lb $t1, ($t0) - beqz $t1, BF_CLOSE_2_1 -BF_OPEN_2_1: + beqz $t1, BF_CLOSE_972059b874ce5ef469e931bb_1 +BF_OPEN_972059b874ce5ef469e931bb_1: addi $t0, $t0, 1 lb $t1, ($t0) addi $t1, $t1, 1 @@ -95,8 +98,8 @@ BF_OPEN_2_1: subi $t1, $t1, 1 sb $t1, ($t0) lb $t1, ($t0) - bnez $t1, BF_OPEN_2_1 -BF_CLOSE_2_1: + bnez $t1, BF_OPEN_972059b874ce5ef469e931bb_1 +BF_CLOSE_972059b874ce5ef469e931bb_1: addi $t0, $t0, 1 lb $t1, ($t0) addi $t1, $t1, 1 @@ -115,19 +118,19 @@ BF_CLOSE_2_1: addi $t1, $t1, 1 sb $t1, ($t0) lb $t1, ($t0) - beqz $t1, BF_CLOSE_3_1 -BF_OPEN_3_1: + beqz $t1, BF_CLOSE_4f2d57da98e6bb1619411ff9_1 +BF_OPEN_4f2d57da98e6bb1619411ff9_1: subi $t0, $t0, 1 lb $t1, ($t0) - bnez $t1, BF_OPEN_3_1 -BF_CLOSE_3_1: + bnez $t1, BF_OPEN_4f2d57da98e6bb1619411ff9_1 +BF_CLOSE_4f2d57da98e6bb1619411ff9_1: subi $t0, $t0, 1 lb $t1, ($t0) subi $t1, $t1, 1 sb $t1, ($t0) lb $t1, ($t0) - bnez $t1, BF_OPEN_1_0 -BF_CLOSE_1_0: + bnez $t1, BF_OPEN_03e0a731165036472c4dae07_0 +BF_CLOSE_03e0a731165036472c4dae07_0: addi $t0, $t0, 1 addi $t0, $t0, 1 li $v0, 11 diff --git a/mips2.asm b/mips2.asm index c15b3d6..24e4f59 100644 --- a/mips2.asm +++ b/mips2.asm @@ -1,96 +1,48 @@ + .data - data_address: .word 0 - data_pointer: .word 0 - newline: .asciiz "\n" + data_pointer: .word 0 .text setup: li $v0, 9 li $a0, 30000 syscall - la $t1, data_address + la $t1, data_pointer sw $v0, ($t1) # t0 - current address (data_pointer) - # t1 - - # t2 - temp move $t0, $v0 bf: lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 + addi $t1, $t1, 8 sb $t1, ($t0) lb $t1, ($t0) beqz $t1, BF_CLOSE_1 BF_OPEN_1: addi $t0, $t0, 1 lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 + addi $t1, $t1, 4 sb $t1, ($t0) lb $t1, ($t0) beqz $t1, BF_CLOSE_2 BF_OPEN_2: addi $t0, $t0, 1 lb $t1, ($t0) - addi $t1, $t1, 1 + addi $t1, $t1, 2 sb $t1, ($t0) + addi $t0, $t0, 1 lb $t1, ($t0) - addi $t1, $t1, 1 + addi $t1, $t1, 3 + sb $t1, ($t0) + addi $t0, $t0, 1 + lb $t1, ($t0) + addi $t1, $t1, 3 sb $t1, ($t0) addi $t0, $t0, 1 lb $t1, ($t0) addi $t1, $t1, 1 sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - addi $t0, $t0, 1 - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - addi $t0, $t0, 1 - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - subi $t0, $t0, 1 - subi $t0, $t0, 1 - subi $t0, $t0, 1 - subi $t0, $t0, 1 + subi $t0, $t0, 4 lb $t1, ($t0) subi $t1, $t1, 1 sb $t1, ($t0) @@ -109,8 +61,7 @@ BF_CLOSE_2: lb $t1, ($t0) subi $t1, $t1, 1 sb $t1, ($t0) - addi $t0, $t0, 1 - addi $t0, $t0, 1 + addi $t0, $t0, 2 lb $t1, ($t0) addi $t1, $t1, 1 sb $t1, ($t0) @@ -128,65 +79,30 @@ BF_CLOSE_3: lb $t1, ($t0) bnez $t1, BF_OPEN_1 BF_CLOSE_1: - addi $t0, $t0, 1 - addi $t0, $t0, 1 + addi $t0, $t0, 2 li $v0, 11 lb $a0, ($t0) syscall addi $t0, $t0, 1 lb $t1, ($t0) - subi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - subi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - subi $t1, $t1, 1 + subi $t1, $t1, 3 sb $t1, ($t0) li $v0, 11 lb $a0, ($t0) syscall lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 + addi $t1, $t1, 7 sb $t1, ($t0) li $v0, 11 lb $a0, ($t0) syscall - li $v0, 11 - lb $a0, ($t0) - syscall lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 + addi $t1, $t1, 3 sb $t1, ($t0) li $v0, 11 lb $a0, ($t0) syscall - addi $t0, $t0, 1 - addi $t0, $t0, 1 + addi $t0, $t0, 2 li $v0, 11 lb $a0, ($t0) syscall @@ -202,67 +118,24 @@ BF_CLOSE_1: lb $a0, ($t0) syscall lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 + addi $t1, $t1, 3 sb $t1, ($t0) li $v0, 11 lb $a0, ($t0) syscall lb $t1, ($t0) - subi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - subi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - subi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - subi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - subi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - subi $t1, $t1, 1 + subi $t1, $t1, 6 sb $t1, ($t0) li $v0, 11 lb $a0, ($t0) syscall lb $t1, ($t0) - subi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - subi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - subi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - subi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - subi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - subi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - subi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - subi $t1, $t1, 1 + subi $t1, $t1, 8 sb $t1, ($t0) li $v0, 11 lb $a0, ($t0) syscall - addi $t0, $t0, 1 - addi $t0, $t0, 1 + addi $t0, $t0, 2 lb $t1, ($t0) addi $t1, $t1, 1 sb $t1, ($t0) @@ -271,11 +144,11 @@ BF_CLOSE_1: syscall addi $t0, $t0, 1 lb $t1, ($t0) - addi $t1, $t1, 1 - sb $t1, ($t0) - lb $t1, ($t0) - addi $t1, $t1, 1 + addi $t1, $t1, 2 sb $t1, ($t0) li $v0, 11 lb $a0, ($t0) syscall + li $v0, 11 + lb $a0, ($t0) + syscall diff --git a/src/bf_mips_codegen.cpp b/src/bf_mips_codegen.cpp index 6776bd0..5fe8567 100644 --- a/src/bf_mips_codegen.cpp +++ b/src/bf_mips_codegen.cpp @@ -6,24 +6,21 @@ #include #include -std::string preamble = ".data\n" - "\tdata_address: \t.word \t0\n" - "\tdata_pointer:\t.word\t0\n" - "\tnewline: .asciiz \"\\n\"\n" - ".text\n" - "setup:\n" - "\tli $v0, 9\n" - "\tli $a0, 30000\n" - "\tsyscall\n" - "\t\n" - "\tla $t1, data_address\n" - "\tsw $v0, ($t1)\n" - "\t\n" - "\t# t0 - current address (data_pointer)\n" - "\t# t1 -\n" - "\t# t2 - temp\n" - "\tmove $t0, $v0\n" - "bf:\n"; +std::string preamble = R"(.data + data_pointer: .word 0 +.text +setup: + li $v0, 9 + li $a0, 30000 + syscall + + la $t1, data_pointer + sw $v0, ($t1) + + # t0 - current address (data_pointer) + move $t0, $v0 +bf: +)"; void process_print(const std::vector& tokens, size_t index); @@ -33,12 +30,12 @@ void codegen(tokenizer& tokenizer, std::ostream& out) if (!tokenizer.hasNext()) throw std::runtime_error("You failed to provide valid BF code"); // skip past comments - if (tokenizer.next().type == bf_token::OPEN) - while (tokenizer.hasNext() && tokenizer.next().type != bf_token::CLOSE); + if (tokenizer.peek().type == bf_token::OPEN) + while (tokenizer.hasNext() && tokenizer.peek().type != bf_token::CLOSE) + tokenizer.advance(); tokenizer.print(); - size_t sp = 0; while (tokenizer.hasNext()) { auto& token = tokenizer.next(); @@ -48,19 +45,19 @@ void codegen(tokenizer& tokenizer, std::ostream& out) switch (token.type) { case bf_token::INC_DP: - out << "\taddi $t0, $t0, 1\n"; + out << "\taddi $t0, $t0, " << token.offset << "\n"; break; case bf_token::DEC_DP: - out << "\tsubi $t0, $t0, 1\n"; + out << "\tsubi $t0, $t0, " << token.offset << "\n"; break; case bf_token::INC_DV: out << "\tlb $t1, ($t0)\n" - << "\taddi $t1, $t1, 1\n" + << "\taddi $t1, $t1, " << static_cast(token.offset) << "\n" << "\tsb $t1, ($t0)\n"; break; case bf_token::DEC_DV: out << "\tlb $t1, ($t0)\n" - << "\tsubi $t1, $t1, 1\n" + << "\tsubi $t1, $t1, " << static_cast(token.offset) << "\n" << "\tsb $t1, ($t0)\n"; break; case bf_token::PRINT: @@ -77,10 +74,8 @@ void codegen(tokenizer& tokenizer, std::ostream& out) out << "\tlb $t1, ($t0)\n" << "\tbeqz $t1, BF_CLOSE_" << name << '\n' << "BF_OPEN_" << name << ":\n"; - sp++; break; case bf_token::CLOSE: - sp--; out << "\tlb $t1, ($t0)\n" << "\tbnez $t1, BF_OPEN_" << name << '\n' << "BF_CLOSE_" << name << ":\n"; diff --git a/src/bf_tokenizer.cpp b/src/bf_tokenizer.cpp index d160c1c..c016ecf 100644 --- a/src/bf_tokenizer.cpp +++ b/src/bf_tokenizer.cpp @@ -8,64 +8,90 @@ #include #include -class characterizer { +struct char_pair +{ + char type; + size_t count; +}; + +class characterizer +{ private: std::string program; size_t currentIndex = 0; public: - inline bool hasNext(){ + explicit characterizer(std::string program): program(std::move(program)) + {} + + inline bool hasNext() + { return currentIndex < program.size(); } - inline size_t advance(){ + inline size_t advance() + { return currentIndex++; } - inline char next(){ + inline char next() + { return program[advance()]; } - inline char peek(){ + inline char_pair fetch() + { + size_t start = currentIndex; + char type = next(); + + while (peek() == type) + advance(); + + return {type, currentIndex - start}; + } + + inline char peek() + { return program[currentIndex]; } }; void tokenizer::tokenize(const std::string& program) { - size_t index = 0; - while (index < program.size()) + characterizer tk{program}; + while (tk.hasNext()) { - auto c = program[index]; - switch (c) + auto dv = tk.fetch(); + bf_token type = bf_token::PRINT; + switch (dv.type) { case '>': - tokens.emplace_back(bf_token::INC_DP); + type = (bf_token::INC_DP); break; case '<': - tokens.emplace_back(bf_token::DEC_DP); + type = (bf_token::DEC_DP); break; case '+': - tokens.emplace_back(bf_token::INC_DV); + type = (bf_token::INC_DV); break; case '-': - tokens.emplace_back(bf_token::DEC_DV); + type = (bf_token::DEC_DV); break; case '.': - tokens.emplace_back(bf_token::PRINT); + type = (bf_token::PRINT); break; case ',': - tokens.emplace_back(bf_token::READ); + type = (bf_token::READ); break; case '[': - tokens.emplace_back(bf_token::OPEN); + type = (bf_token::OPEN); break; case ']': - tokens.emplace_back(bf_token::CLOSE); + type = (bf_token::CLOSE); break; default: break; } - index++; + tokens.emplace_back(type, dv.count); } } @@ -111,19 +137,19 @@ void tokenizer::print(size_t index) { case bf_token::INC_DP: tabulate(sp); - std::cout << "Increase DP\n"; + std::cout << "Increase DP " << token.offset << "\n"; break; case bf_token::DEC_DP: tabulate(sp); - std::cout << "Decrease DP\n"; + std::cout << "Decrease DP " << token.offset << "\n"; break; case bf_token::INC_DV: tabulate(sp); - std::cout << "Increase DV\n"; + std::cout << "Increase DV " << token.offset << "\n"; break; case bf_token::DEC_DV: tabulate(sp); - std::cout << "Decrease DV\n"; + std::cout << "Decrease DV " << token.offset << "\n"; break; case bf_token::PRINT: tabulate(sp);