minor work on parser

main
Brett 2025-03-03 15:44:42 -05:00
parent 0490f50e3c
commit 4ac592beca
4 changed files with 349 additions and 86 deletions

View File

@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.20) cmake_minimum_required(VERSION 3.20)
include(cmake/color.cmake) include(cmake/color.cmake)
set(BLT_VERSION 5.1.0) set(BLT_VERSION 5.1.1)
set(BLT_TARGET BLT) set(BLT_TARGET BLT)

View File

@ -26,39 +26,114 @@
namespace blt::logging namespace blt::logging
{ {
enum class fmt_token_type : u8 enum class fmt_token_type : u8
{ {
STRING, STRING,
NUMBER, NUMBER,
SPACE, SPACE,
COLON, COLON,
DOT, DOT,
MINUS, MINUS,
PLUS PLUS
}; };
struct fmt_token_t enum class fmt_sign_t : u8
{ {
fmt_token_type type; SPACE,
std::string_view value; PLUS,
}; MINUS
};
class fmt_tokenizer_t enum class fmt_type_t : u8
{ {
public: BINARY, // 'b'
explicit fmt_tokenizer_t(const std::string_view fmt): m_fmt(fmt) CHAR, // 'c'
{} DECIMAL, // 'd'
OCTAL, // 'o'
HEX, // 'x'
HEX_FLOAT, // 'a'
EXPONENT, // 'e'
FIXED_POINT, // 'f'
GENERAL // 'g'
};
static fmt_token_type get_type(char c); struct fmt_spec_t
{
i64 arg_id = -1;
i64 width = -1;
i64 precision = -1;
fmt_type_t type = fmt_type_t::DECIMAL;
fmt_sign_t sign = fmt_sign_t::MINUS;
bool leading_zeros = false;
bool uppercase = false;
};
std::optional<fmt_token_t> next(); struct fmt_token_t
{
fmt_token_type type;
std::string_view value;
};
std::vector<fmt_token_t> tokenize(); class fmt_tokenizer_t
{
public:
explicit fmt_tokenizer_t() = default;
private: static fmt_token_type get_type(char c);
size_t pos = 0;
std::string_view m_fmt; std::optional<fmt_token_t> next();
};
std::vector<fmt_token_t> tokenize(std::string_view fmt);
private:
size_t m_pos = 0;
std::string_view m_fmt;
};
class fmt_parser_t
{
public:
explicit fmt_parser_t() = default;
fmt_token_t& peek()
{
return m_tokens[m_pos];
}
[[nodiscard]] bool has_next() const
{
return m_pos < m_tokens.size();
}
[[nodiscard]] fmt_token_t& next()
{
return m_tokens[m_pos++];
}
void consume()
{
++m_pos;
}
const fmt_spec_t& parse(std::string_view fmt);
private:
void parse_fmt_field();
void parse_arg_id();
void parse_fmt_spec_stage_1();
void parse_fmt_spec_stage_2();
void parse_fmt_spec_stage_3();
void parse_sign();
void parse_width();
void parse_precision();
void parse_type();
size_t m_pos = 0;
std::vector<fmt_token_t> m_tokens;
fmt_tokenizer_t m_tokenizer;
fmt_spec_t m_spec;
};
} }
#endif //BLT_LOGGING_FMT_TOKENIZER_H #endif //BLT_LOGGING_FMT_TOKENIZER_H

@ -1 +1 @@
Subproject commit 93201da2ba5a6aba0a6e57ada64973555629b3e3 Subproject commit 7ef2e733416953b222851f9a360d7fc72d068ee5

View File

@ -15,67 +15,255 @@
* You should have received a copy of the GNU General Public License * You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>. * along with this program. If not, see <https://www.gnu.org/licenses/>.
*/ */
#include <iostream>
#include <blt/logging/fmt_tokenizer.h> #include <blt/logging/fmt_tokenizer.h>
namespace blt::logging namespace blt::logging
{ {
fmt_token_type fmt_tokenizer_t::get_type(const char c) fmt_token_type fmt_tokenizer_t::get_type(const char c)
{ {
switch (c) switch (c)
{ {
case '0': case '0':
case '1': case '1':
case '2': case '2':
case '3': case '3':
case '4': case '4':
case '5': case '5':
case '6': case '6':
case '7': case '7':
case '8': case '8':
case '9': case '9':
return fmt_token_type::NUMBER; return fmt_token_type::NUMBER;
case '+': case '+':
return fmt_token_type::PLUS; return fmt_token_type::PLUS;
case '-': case '-':
return fmt_token_type::MINUS; return fmt_token_type::MINUS;
case '.': case '.':
return fmt_token_type::DOT; return fmt_token_type::DOT;
case ':': case ':':
return fmt_token_type::COLON; return fmt_token_type::COLON;
case ' ': case ' ':
return fmt_token_type::SPACE; return fmt_token_type::SPACE;
default: default:
return fmt_token_type::STRING; return fmt_token_type::STRING;
} }
} }
std::optional<fmt_token_t> fmt_tokenizer_t::next() std::optional<fmt_token_t> fmt_tokenizer_t::next()
{ {
if (pos >= m_fmt.size()) if (m_pos >= m_fmt.size())
return {}; return {};
switch (const auto base_type = get_type(m_fmt[pos])) switch (const auto base_type = get_type(m_fmt[m_pos]))
{ {
case fmt_token_type::SPACE: case fmt_token_type::SPACE:
case fmt_token_type::PLUS: case fmt_token_type::PLUS:
case fmt_token_type::MINUS: case fmt_token_type::MINUS:
case fmt_token_type::DOT: case fmt_token_type::DOT:
case fmt_token_type::COLON: case fmt_token_type::COLON:
return fmt_token_t{base_type, std::string_view{m_fmt.data() + pos++, 1}}; return fmt_token_t{base_type, std::string_view{m_fmt.data() + m_pos++, 1}};
default: default:
{ {
const auto begin = pos; const auto begin = m_pos;
for (; pos < m_fmt.size() && get_type(m_fmt[pos]) == base_type; ++pos) for (; m_pos < m_fmt.size() && get_type(m_fmt[m_pos]) == base_type; ++m_pos)
{} {
return fmt_token_t{base_type, std::string_view{m_fmt.data() + begin, pos - begin}}; }
} return fmt_token_t{base_type, std::string_view{m_fmt.data() + begin, m_pos - begin}};
} }
} }
}
std::vector<fmt_token_t> fmt_tokenizer_t::tokenize() std::vector<fmt_token_t> fmt_tokenizer_t::tokenize(const std::string_view fmt)
{ {
std::vector<fmt_token_t> tokens; m_fmt = fmt;
while (auto token = next()) m_pos = 0;
tokens.push_back(*token); std::vector<fmt_token_t> tokens;
return tokens; while (auto token = next())
} tokens.push_back(*token);
return tokens;
}
const fmt_spec_t& fmt_parser_t::parse(const std::string_view fmt)
{
m_spec = {};
m_pos = 0;
m_tokens = m_tokenizer.tokenize(fmt);
parse_fmt_field();
return m_spec;
}
void fmt_parser_t::parse_fmt_field()
{
if (!has_next())
{
std::cerr << "Expected token when parsing format field" << std::endl;
std::exit(EXIT_FAILURE);
}
const auto [type, value] = peek();
if (type == fmt_token_type::COLON)
{
consume();
parse_fmt_spec_stage_1();
}
else if (type == fmt_token_type::NUMBER)
{
parse_arg_id();
if (has_next())
{
if (peek().type == fmt_token_type::COLON)
{
consume();
parse_fmt_spec_stage_1();
}else
{
std::cerr << "Expected ':' when parsing format field after arg id!" << std::endl;
std::exit(EXIT_FAILURE);
}
}
}
else
{
std::cerr << "Expected unknown token '" << static_cast<u8>(type) << "' value '" << value << "' when parsing format field" << std::endl;
std::exit(EXIT_FAILURE);
}
if (has_next())
parse_type();
}
void fmt_parser_t::parse_arg_id()
{
if (!has_next())
{
std::cerr << "Missing token when parsing arg id" << std::endl;
std::exit(EXIT_FAILURE);
}
const auto [type, value] = next();
if (type != fmt_token_type::NUMBER)
{
std::cerr << "Expected number when parsing arg id, unexpected value '" << value << '\'' << std::endl;
std::exit(EXIT_FAILURE);
}
m_spec.arg_id = std::stoll(std::string(value));
}
// handle start of fmt, with sign
void fmt_parser_t::parse_fmt_spec_stage_1()
{
if (!has_next())
return;
auto [type, value] = peek();
switch (type)
{
case fmt_token_type::STRING:
case fmt_token_type::COLON:
std::cerr << "(Stage 1) Invalid token type " << static_cast<u8>(type) << " value " << value << std::endl;
std::exit(EXIT_FAILURE);
case fmt_token_type::NUMBER:
parse_width();
parse_fmt_spec_stage_3();
break;
case fmt_token_type::DOT:
consume();
parse_precision();
break;
case fmt_token_type::SPACE:
case fmt_token_type::MINUS:
case fmt_token_type::PLUS:
parse_sign();
parse_fmt_spec_stage_2();
break;
}
}
// handle width parsing
void fmt_parser_t::parse_fmt_spec_stage_2()
{
if (!has_next())
return;
auto [type, value] = peek();
switch (type)
{
case fmt_token_type::STRING:
case fmt_token_type::COLON:
case fmt_token_type::MINUS:
case fmt_token_type::PLUS:
case fmt_token_type::SPACE:
std::cerr << "(Stage 2) Invalid token type " << static_cast<u8>(type) << " value " << value << std::endl;
std::exit(EXIT_FAILURE);
case fmt_token_type::NUMBER:
parse_width();
parse_fmt_spec_stage_3();
break;
case fmt_token_type::DOT:
consume();
parse_precision();
break;
}
}
void fmt_parser_t::parse_fmt_spec_stage_3()
{
if (!has_next())
return;
auto [type, value] = peek();
switch (type)
{
case fmt_token_type::STRING:
case fmt_token_type::COLON:
case fmt_token_type::MINUS:
case fmt_token_type::PLUS:
case fmt_token_type::SPACE:
case fmt_token_type::NUMBER:
std::cerr << "(Stage 3) Invalid token type " << static_cast<u8>(type) << " value " << value << std::endl;
std::exit(EXIT_FAILURE);
case fmt_token_type::DOT:
consume();
parse_precision();
break;
}
}
void fmt_parser_t::parse_sign()
{
auto [_, value] = next();
if (value.size() > 1)
{
std::cerr << "Sign contains more than one character, we are not sure how to interpret this. Value '" << value << "'\n";
std::exit(EXIT_FAILURE);
}
switch (value[0])
{
case '+':
m_spec.sign = fmt_sign_t::PLUS;
break;
case '-':
m_spec.sign = fmt_sign_t::MINUS;
break;
case ' ':
m_spec.sign = fmt_sign_t::SPACE;
break;
default:
std::cerr << "Invalid sign " << value[0] << std::endl;
std::exit(EXIT_FAILURE);
}
}
void fmt_parser_t::parse_width()
{
auto [_, value] = next();
if (value.front() == '0')
m_spec.leading_zeros = true;
m_spec.width = std::stoll(std::string(value));
}
void fmt_parser_t::parse_precision()
{
}
void fmt_parser_t::parse_type()
{
}
} }