minor work on parser
parent
0490f50e3c
commit
4ac592beca
|
@ -1,6 +1,6 @@
|
|||
cmake_minimum_required(VERSION 3.20)
|
||||
include(cmake/color.cmake)
|
||||
set(BLT_VERSION 5.1.0)
|
||||
set(BLT_VERSION 5.1.1)
|
||||
|
||||
set(BLT_TARGET BLT)
|
||||
|
||||
|
|
|
@ -37,6 +37,37 @@ namespace blt::logging
|
|||
PLUS
|
||||
};
|
||||
|
||||
enum class fmt_sign_t : u8
|
||||
{
|
||||
SPACE,
|
||||
PLUS,
|
||||
MINUS
|
||||
};
|
||||
|
||||
enum class fmt_type_t : u8
|
||||
{
|
||||
BINARY, // 'b'
|
||||
CHAR, // 'c'
|
||||
DECIMAL, // 'd'
|
||||
OCTAL, // 'o'
|
||||
HEX, // 'x'
|
||||
HEX_FLOAT, // 'a'
|
||||
EXPONENT, // 'e'
|
||||
FIXED_POINT, // 'f'
|
||||
GENERAL // 'g'
|
||||
};
|
||||
|
||||
struct fmt_spec_t
|
||||
{
|
||||
i64 arg_id = -1;
|
||||
i64 width = -1;
|
||||
i64 precision = -1;
|
||||
fmt_type_t type = fmt_type_t::DECIMAL;
|
||||
fmt_sign_t sign = fmt_sign_t::MINUS;
|
||||
bool leading_zeros = false;
|
||||
bool uppercase = false;
|
||||
};
|
||||
|
||||
struct fmt_token_t
|
||||
{
|
||||
fmt_token_type type;
|
||||
|
@ -46,19 +77,63 @@ namespace blt::logging
|
|||
class fmt_tokenizer_t
|
||||
{
|
||||
public:
|
||||
explicit fmt_tokenizer_t(const std::string_view fmt): m_fmt(fmt)
|
||||
{}
|
||||
explicit fmt_tokenizer_t() = default;
|
||||
|
||||
static fmt_token_type get_type(char c);
|
||||
|
||||
std::optional<fmt_token_t> next();
|
||||
|
||||
std::vector<fmt_token_t> tokenize();
|
||||
std::vector<fmt_token_t> tokenize(std::string_view fmt);
|
||||
|
||||
private:
|
||||
size_t pos = 0;
|
||||
size_t m_pos = 0;
|
||||
std::string_view m_fmt;
|
||||
};
|
||||
|
||||
|
||||
class fmt_parser_t
|
||||
{
|
||||
public:
|
||||
explicit fmt_parser_t() = default;
|
||||
|
||||
fmt_token_t& peek()
|
||||
{
|
||||
return m_tokens[m_pos];
|
||||
}
|
||||
|
||||
[[nodiscard]] bool has_next() const
|
||||
{
|
||||
return m_pos < m_tokens.size();
|
||||
}
|
||||
|
||||
[[nodiscard]] fmt_token_t& next()
|
||||
{
|
||||
return m_tokens[m_pos++];
|
||||
}
|
||||
|
||||
void consume()
|
||||
{
|
||||
++m_pos;
|
||||
}
|
||||
|
||||
const fmt_spec_t& parse(std::string_view fmt);
|
||||
|
||||
private:
|
||||
void parse_fmt_field();
|
||||
void parse_arg_id();
|
||||
void parse_fmt_spec_stage_1();
|
||||
void parse_fmt_spec_stage_2();
|
||||
void parse_fmt_spec_stage_3();
|
||||
void parse_sign();
|
||||
void parse_width();
|
||||
void parse_precision();
|
||||
void parse_type();
|
||||
|
||||
size_t m_pos = 0;
|
||||
std::vector<fmt_token_t> m_tokens;
|
||||
fmt_tokenizer_t m_tokenizer;
|
||||
fmt_spec_t m_spec;
|
||||
};
|
||||
}
|
||||
|
||||
#endif //BLT_LOGGING_FMT_TOKENIZER_H
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 93201da2ba5a6aba0a6e57ada64973555629b3e3
|
||||
Subproject commit 7ef2e733416953b222851f9a360d7fc72d068ee5
|
|
@ -15,6 +15,7 @@
|
|||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <iostream>
|
||||
#include <blt/logging/fmt_tokenizer.h>
|
||||
|
||||
namespace blt::logging
|
||||
|
@ -51,31 +52,218 @@ namespace blt::logging
|
|||
|
||||
std::optional<fmt_token_t> fmt_tokenizer_t::next()
|
||||
{
|
||||
if (pos >= m_fmt.size())
|
||||
if (m_pos >= m_fmt.size())
|
||||
return {};
|
||||
switch (const auto base_type = get_type(m_fmt[pos]))
|
||||
switch (const auto base_type = get_type(m_fmt[m_pos]))
|
||||
{
|
||||
case fmt_token_type::SPACE:
|
||||
case fmt_token_type::PLUS:
|
||||
case fmt_token_type::MINUS:
|
||||
case fmt_token_type::DOT:
|
||||
case fmt_token_type::COLON:
|
||||
return fmt_token_t{base_type, std::string_view{m_fmt.data() + pos++, 1}};
|
||||
return fmt_token_t{base_type, std::string_view{m_fmt.data() + m_pos++, 1}};
|
||||
default:
|
||||
{
|
||||
const auto begin = pos;
|
||||
for (; pos < m_fmt.size() && get_type(m_fmt[pos]) == base_type; ++pos)
|
||||
{}
|
||||
return fmt_token_t{base_type, std::string_view{m_fmt.data() + begin, pos - begin}};
|
||||
const auto begin = m_pos;
|
||||
for (; m_pos < m_fmt.size() && get_type(m_fmt[m_pos]) == base_type; ++m_pos)
|
||||
{
|
||||
}
|
||||
return fmt_token_t{base_type, std::string_view{m_fmt.data() + begin, m_pos - begin}};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<fmt_token_t> fmt_tokenizer_t::tokenize()
|
||||
std::vector<fmt_token_t> fmt_tokenizer_t::tokenize(const std::string_view fmt)
|
||||
{
|
||||
m_fmt = fmt;
|
||||
m_pos = 0;
|
||||
std::vector<fmt_token_t> tokens;
|
||||
while (auto token = next())
|
||||
tokens.push_back(*token);
|
||||
return tokens;
|
||||
}
|
||||
|
||||
const fmt_spec_t& fmt_parser_t::parse(const std::string_view fmt)
|
||||
{
|
||||
m_spec = {};
|
||||
m_pos = 0;
|
||||
m_tokens = m_tokenizer.tokenize(fmt);
|
||||
|
||||
parse_fmt_field();
|
||||
|
||||
return m_spec;
|
||||
}
|
||||
|
||||
void fmt_parser_t::parse_fmt_field()
|
||||
{
|
||||
if (!has_next())
|
||||
{
|
||||
std::cerr << "Expected token when parsing format field" << std::endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
const auto [type, value] = peek();
|
||||
if (type == fmt_token_type::COLON)
|
||||
{
|
||||
consume();
|
||||
parse_fmt_spec_stage_1();
|
||||
}
|
||||
else if (type == fmt_token_type::NUMBER)
|
||||
{
|
||||
parse_arg_id();
|
||||
if (has_next())
|
||||
{
|
||||
if (peek().type == fmt_token_type::COLON)
|
||||
{
|
||||
consume();
|
||||
parse_fmt_spec_stage_1();
|
||||
}else
|
||||
{
|
||||
std::cerr << "Expected ':' when parsing format field after arg id!" << std::endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cerr << "Expected unknown token '" << static_cast<u8>(type) << "' value '" << value << "' when parsing format field" << std::endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
if (has_next())
|
||||
parse_type();
|
||||
}
|
||||
|
||||
void fmt_parser_t::parse_arg_id()
|
||||
{
|
||||
if (!has_next())
|
||||
{
|
||||
std::cerr << "Missing token when parsing arg id" << std::endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
const auto [type, value] = next();
|
||||
if (type != fmt_token_type::NUMBER)
|
||||
{
|
||||
std::cerr << "Expected number when parsing arg id, unexpected value '" << value << '\'' << std::endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
m_spec.arg_id = std::stoll(std::string(value));
|
||||
}
|
||||
|
||||
// handle start of fmt, with sign
|
||||
void fmt_parser_t::parse_fmt_spec_stage_1()
|
||||
{
|
||||
if (!has_next())
|
||||
return;
|
||||
auto [type, value] = peek();
|
||||
switch (type)
|
||||
{
|
||||
case fmt_token_type::STRING:
|
||||
case fmt_token_type::COLON:
|
||||
std::cerr << "(Stage 1) Invalid token type " << static_cast<u8>(type) << " value " << value << std::endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
case fmt_token_type::NUMBER:
|
||||
parse_width();
|
||||
parse_fmt_spec_stage_3();
|
||||
break;
|
||||
case fmt_token_type::DOT:
|
||||
consume();
|
||||
parse_precision();
|
||||
break;
|
||||
case fmt_token_type::SPACE:
|
||||
case fmt_token_type::MINUS:
|
||||
case fmt_token_type::PLUS:
|
||||
parse_sign();
|
||||
parse_fmt_spec_stage_2();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// handle width parsing
|
||||
void fmt_parser_t::parse_fmt_spec_stage_2()
|
||||
{
|
||||
if (!has_next())
|
||||
return;
|
||||
auto [type, value] = peek();
|
||||
switch (type)
|
||||
{
|
||||
case fmt_token_type::STRING:
|
||||
case fmt_token_type::COLON:
|
||||
case fmt_token_type::MINUS:
|
||||
case fmt_token_type::PLUS:
|
||||
case fmt_token_type::SPACE:
|
||||
std::cerr << "(Stage 2) Invalid token type " << static_cast<u8>(type) << " value " << value << std::endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
case fmt_token_type::NUMBER:
|
||||
parse_width();
|
||||
parse_fmt_spec_stage_3();
|
||||
break;
|
||||
case fmt_token_type::DOT:
|
||||
consume();
|
||||
parse_precision();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void fmt_parser_t::parse_fmt_spec_stage_3()
|
||||
{
|
||||
if (!has_next())
|
||||
return;
|
||||
auto [type, value] = peek();
|
||||
switch (type)
|
||||
{
|
||||
case fmt_token_type::STRING:
|
||||
case fmt_token_type::COLON:
|
||||
case fmt_token_type::MINUS:
|
||||
case fmt_token_type::PLUS:
|
||||
case fmt_token_type::SPACE:
|
||||
case fmt_token_type::NUMBER:
|
||||
std::cerr << "(Stage 3) Invalid token type " << static_cast<u8>(type) << " value " << value << std::endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
case fmt_token_type::DOT:
|
||||
consume();
|
||||
parse_precision();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void fmt_parser_t::parse_sign()
|
||||
{
|
||||
auto [_, value] = next();
|
||||
if (value.size() > 1)
|
||||
{
|
||||
std::cerr << "Sign contains more than one character, we are not sure how to interpret this. Value '" << value << "'\n";
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
switch (value[0])
|
||||
{
|
||||
case '+':
|
||||
m_spec.sign = fmt_sign_t::PLUS;
|
||||
break;
|
||||
case '-':
|
||||
m_spec.sign = fmt_sign_t::MINUS;
|
||||
break;
|
||||
case ' ':
|
||||
m_spec.sign = fmt_sign_t::SPACE;
|
||||
break;
|
||||
default:
|
||||
std::cerr << "Invalid sign " << value[0] << std::endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void fmt_parser_t::parse_width()
|
||||
{
|
||||
auto [_, value] = next();
|
||||
if (value.front() == '0')
|
||||
m_spec.leading_zeros = true;
|
||||
m_spec.width = std::stoll(std::string(value));
|
||||
}
|
||||
|
||||
void fmt_parser_t::parse_precision()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void fmt_parser_t::parse_type()
|
||||
{
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue