Upgrade StringLexer with better design principles.

Strings are now processed in a much clearer, more intuitive way
main
Brett 2023-08-20 20:24:11 -04:00
parent 22a89031c3
commit dd96940c67
13 changed files with 242 additions and 144 deletions

Binary file not shown.

View File

@ -1,2 +1 @@
l5yQfzNDLXuq6Ic1 1692481044
7IgWbfRRG3liKhkP 1692489339
aubTl45vzPn5feHw 1692660008

View File

@ -1 +0,0 @@
{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"TF/rwm67DntB0hrdGiPpYRPFvnZ786r8nrZ4+WQ6wUang4xbqNaZ0AUpXKcHeswaC+IwR0891JZtXP+4XcHsQA=="}

View File

@ -0,0 +1 @@
{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"Tcl8i/S1Czz+UGS6NzeRu/Hyk66oJjYbsRsm3tPqd/AVt2yAVbFEEi/oGdaoIlTriQf5TX7heYPxqdcGMmLRVg=="}

View File

@ -1 +0,0 @@
{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"qJNyHpcA1b1EY6/so2rH3FhBilMKDDnCJSliaeytHMh4V0kj2yy4Mb1UEO7dW/uYdgfNWn73dwCrioovCe1NHg=="}

View File

@ -3,6 +3,12 @@
<head>
<link rel="stylesheet" href="/static/css/home.css">
<link rel="stylesheet" href="/static/css/bar.css">
<link rel="javascript" href="cdnjs.cloudflare.com/ajax/libs/jquery/2.1.3/jquery.min.js">
<meta charset="UTF-8">
<meta name="description" content="">
<meta name="keywords" content="">
<meta name="author" content="Brett">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{$SITE_TITLE}}</title>
</head>
<body class="blur-bgimage" style="background-image: url({{$SITE_BACKGROUND}})">
@ -14,6 +20,9 @@
</div>
<div class="center">
HAXsdsad
{{#_admin}}
Admin detected
{{/_admin}}
</div>
</div>
</div>

View File

@ -39,6 +39,7 @@ namespace cs {
* @return memory usage of the pages cache in bytes
*/
uint64_t calculateMemoryUsage();
void resolveLinks(const std::string& file, HTMLPage& page);
void loadPage(const std::string& path);
/**
* Prunes the cache starting with the oldest pages we have loaded. (in bytes)

View File

@ -8,9 +8,56 @@
#include <memory>
#include <string>
#include <crowsite/config.h>
#include <utility>
namespace cs {
struct StringLexer
{
private:
std::string str;
size_t index = 0;
public:
explicit StringLexer(std::string str): str(std::move(str))
{}
inline bool hasNext()
{
if (index >= str.size())
return false;
return true;
}
inline bool hasTemplatePrefix(char c)
{
if (index + 2 >= str.size())
return false;
return str[index] == '{' && str[index + 1] == '{' && str[index + 2] == c;
}
inline bool hasTemplateSuffix()
{
if (index + 1 >= str.size())
return false;
return str[index] == '}' && str[index + 1] == '}';
}
inline void consumeTemplatePrefix()
{
index += 3;
}
inline void consumeTemplateSuffix()
{
index += 2;
}
inline char consume()
{
return str[index++];
}
};
class StaticContext {
private:
HASHMAP<std::string, std::string> replacements;
@ -46,7 +93,7 @@ namespace cs {
*/
std::string render(StaticContext& context);
inline std::string const& getRawSite() {
inline std::string& getRawSite() {
return m_SiteData;
}
};

View File

@ -8,23 +8,28 @@
#include <algorithm>
#include <blt/std/time.h>
namespace cs {
namespace cs
{
double toSeconds(uint64_t v){
return (double)(v) / 1000000000.0;
double toSeconds(uint64_t v)
{
return (double) (v) / 1000000000.0;
}
CacheEngine::CacheEngine(StaticContext& context, const CacheSettings& settings): m_Context(context),
m_Settings((settings)) {}
m_Settings((settings))
{}
uint64_t CacheEngine::calculateMemoryUsage(const std::string& path, const CacheEngine::CacheValue& value) {
uint64_t CacheEngine::calculateMemoryUsage(const std::string& path, const CacheEngine::CacheValue& value)
{
uint64_t pageContentSize = path.size() * sizeof(char);
pageContentSize += value.page->getRawSite().size() * sizeof(char);
pageContentSize += value.renderedPage.size() * sizeof(char);
return pageContentSize;
}
uint64_t CacheEngine::calculateMemoryUsage() {
uint64_t CacheEngine::calculateMemoryUsage()
{
auto pagesBaseSize = m_Pages.size() * sizeof(CacheValue);
uint64_t pageContentSizes = 0;
@ -34,24 +39,30 @@ namespace cs {
return pagesBaseSize + pageContentSizes;
}
const std::string& CacheEngine::fetch(const std::string& path) {
const std::string& CacheEngine::fetch(const std::string& path)
{
bool load = false;
auto find = m_Pages.find(path);
if (find == m_Pages.end()){
if (find == m_Pages.end())
{
BLT_DEBUG("Page '%s' was not found in cache, loading now!", path.c_str());
load = true;
} else {
} else
{
auto lastWrite = std::filesystem::last_write_time(cs::fs::createWebFilePath(path));
if (lastWrite != m_Pages[path].lastModified) {
if (lastWrite != m_Pages[path].lastModified)
{
load = true;
BLT_DEBUG("Page '%s' has been modified! Reloading now!", path.c_str());
}
}
if (load) {
if (load)
{
auto memory = calculateMemoryUsage();
if (memory > m_Settings.hardMaxMemory) {
if (memory > m_Settings.hardMaxMemory)
{
BLT_WARN("Hard memory limit was reached! Pruning to soft limit now!");
prune(
m_Settings.hardMaxMemory - m_Settings.softMaxMemory
@ -59,7 +70,8 @@ namespace cs {
);
}
if (memory > m_Settings.softMaxMemory) {
if (memory > m_Settings.softMaxMemory)
{
auto amount = std::min(m_Settings.softPruneAmount, memory - m_Settings.softMaxMemory);
BLT_INFO("Soft memory limit was reached! Pruning %d bytes of memory", amount);
prune(amount);
@ -73,11 +85,13 @@ namespace cs {
return m_Pages[path].renderedPage;
}
void CacheEngine::loadPage(const std::string& path) {
void CacheEngine::loadPage(const std::string& path)
{
auto start = blt::system::getCurrentTimeNanoseconds();
auto fullPath = cs::fs::createWebFilePath(path);
auto page = HTMLPage::load(fullPath);
resolveLinks(path, *page);
auto renderedPage = page->render(m_Context);
m_Pages[path] = CacheValue{
blt::system::getCurrentTimeNanoseconds(),
@ -90,8 +104,10 @@ namespace cs {
BLT_INFO("Loaded page %s in %fms", path.c_str(), (end - start) / 1000000.0);
}
void CacheEngine::prune(uint64_t amount) {
struct CacheSorting_t {
void CacheEngine::prune(uint64_t amount)
{
struct CacheSorting_t
{
uint64_t memoryUsage;
std::string key;
};
@ -100,15 +116,19 @@ namespace cs {
for (auto& page : m_Pages)
cachedPages.emplace_back(calculateMemoryUsage(page.first, page.second), page.first);
std::sort(cachedPages.begin(), cachedPages.end(), [&](const CacheSorting_t& i1, const CacheSorting_t& i2) -> bool {
std::sort(
cachedPages.begin(), cachedPages.end(), [&](const CacheSorting_t& i1, const CacheSorting_t& i2) -> bool {
return m_Pages[i1.key].cacheTime < m_Pages[i2.key].cacheTime;
});
}
);
uint64_t prunedAmount = 0;
uint64_t prunedPages = 0;
while (prunedAmount < amount){
while (prunedAmount < amount)
{
auto page = cachedPages[0];
BLT_TRACE("Pruning page (%d bytes) aged %f seconds", page.memoryUsage, toSeconds(blt::system::getCurrentTimeNanoseconds() - m_Pages[page.key].cacheTime));
BLT_TRACE("Pruning page (%d bytes) aged %f seconds", page.memoryUsage,
toSeconds(blt::system::getCurrentTimeNanoseconds() - m_Pages[page.key].cacheTime));
prunedAmount += page.memoryUsage;
m_Pages.erase(page.key);
prunedPages++;
@ -117,5 +137,49 @@ namespace cs {
BLT_INFO("Pruned %d pages", prunedPages);
}
void CacheEngine::resolveLinks(const std::string& file, HTMLPage& page)
{
StringLexer lexer(page.getRawSite());
std::string resolvedSite;
const std::string valid_file_endings[3] = {
".css",
".js",
".part",
};
while (lexer.hasNext())
{
if (lexer.hasTemplatePrefix('@'))
{
lexer.consumeTemplatePrefix();
std::string token;
while (!lexer.hasTemplateSuffix()) {
if (!lexer.hasNext()) {
BLT_WARN("Invalid template syntax. EOF occurred before template was fully processed!");
break;
}
token += lexer.consume();
}
lexer.consumeTemplateSuffix();
for (const auto& suffix : valid_file_endings){
if (token.ends_with(suffix)) {
auto path = cs::fs::createWebFilePath(token);
if (path == file){
BLT_WARN("Recursive load detected!");
BLT_WARN("Caching Engine will ignore this attempt, however, it is recommended that you remove the recursive call.");
BLT_WARN("Detected in file '%s' offending link '%s'", file.c_str(), token.c_str());
}
resolvedSite += fetch(path);
break;
}
}
} else
resolvedSite += lexer.consume();
}
page.getRawSite() = resolvedSite;
}
}

View File

@ -10,88 +10,48 @@
#include <sstream>
#include <algorithm>
namespace cs {
namespace cs
{
class LexerSyntaxException : public std::runtime_error {
class LexerSyntaxException : public std::runtime_error
{
public:
explicit LexerSyntaxException(const std::string& token):
std::runtime_error(
"Extended-mustache syntax error! An opening '{{' must be closed by '}}'! (near: '" +
token + "')"
) {}
)
{}
};
class LexerException : public std::runtime_error {
class LexerException : public std::runtime_error
{
public:
explicit LexerException(const std::string& message):
std::runtime_error("Extended-mustache syntax processing error! " + message) {}
std::runtime_error("Extended-mustache syntax processing error! " + message)
{}
};
class SyntaxException : public std::runtime_error {
class SyntaxException : public std::runtime_error
{
public:
explicit SyntaxException():
std::runtime_error(
"Extended-mustache syntax error! Static context keys should not contain $"
) {}
)
{}
};
class StringLexer {
private:
const std::string& str;
size_t pos = 0;
public:
explicit StringLexer(const std::string& str): str(str) {}
inline char nextToken() {
if (pos >= str.size())
return '\0';
return str[pos++];
}
inline bool hasTokens() {
return pos < str.size();
}
/**
* Tries to find the string 'match' and outputs all found characters to 'out'
* @param match string to match against
* @param out characters 'tokens' read by the lexer
* @return true if found false otherwise;
*/
inline bool findNext(const std::string& match, std::string& out) {
char c;
size_t p = 0;
std::string found;
while ((c = nextToken())) {
// check for match, p should be 0 here!
if (c == match[p]) {
do {
found += c;
// emit token
out += c;
if (found == match){
// TODO?
}
if (c != match[p++]){
p = 0;
found = "";
break;
}
} while ((c = nextToken()));
} else // emit token
out += c;
}
return false;
}
};
std::unique_ptr<HTMLPage> HTMLPage::load(const std::string& path) {
std::unique_ptr<HTMLPage> HTMLPage::load(const std::string& path)
{
std::string htmlSource;
std::ifstream htmlFile;
if (!htmlFile.good())
BLT_ERROR("Input stream not good!\n");
// ensure we can throw exceptions:
htmlFile.exceptions(std::ifstream::failbit | std::ifstream::badbit);
try {
try
{
// open file
htmlFile.open(path);
std::stringstream htmlAsStream;
@ -101,7 +61,8 @@ namespace cs {
htmlFile.close();
// convert stream into std::string
htmlSource = htmlAsStream.str();
} catch (std::ifstream::failure& e) {
} catch (std::ifstream::failure& e)
{
BLT_ERROR("Unable to read file '%s'!\n", path.c_str());
BLT_ERROR("Exception: %s", e.what());
throw std::runtime_error("Failed to read file!\n");
@ -109,63 +70,53 @@ namespace cs {
return std::make_unique<HTMLPage>(HTMLPage(htmlSource));
}
HTMLPage::HTMLPage(std::string siteData): m_SiteData(std::move(siteData)) {}
HTMLPage::HTMLPage(std::string siteData): m_SiteData(std::move(siteData))
{}
std::string HTMLPage::render(StaticContext& context) {
std::string HTMLPage::render(StaticContext& context)
{
std::string processedSiteData = m_SiteData;
std::string buffer;
StringLexer lexer(processedSiteData);
while (lexer.hasTokens()) {
char c;
switch ((c = lexer.nextToken())) {
case '{':
// if we are dealing with a mustache template then we should process
if ((c = lexer.nextToken()) == '{') {
// if it is not the extended syntax we are looking for, skip it as crow will handle it at request time!
if ((c = lexer.nextToken()) != '$') {
buffer += "{{";
buffer += c;
break;
while (lexer.hasNext())
{
if (lexer.hasTemplatePrefix('$'))
{
lexer.consumeTemplatePrefix();
std::string token;
while (!lexer.hasTemplateSuffix())
{
if (!lexer.hasNext())
{
BLT_FATAL("Invalid template syntax. EOF occurred before template was fully processed!");
throw LexerSyntaxException(token);
}
std::string tokenString;
while ((c = lexer.nextToken())) {
if (c == '}') {
if (lexer.nextToken() != '}')
throw LexerSyntaxException(tokenString);
else {
token += lexer.consume();
}
lexer.consumeTemplateSuffix();
if (std::find_if(
context.begin(), context.end(),
[&tokenString](auto in) -> bool {
return tokenString == in.first;
[&token](auto in) -> bool {
return token == in.first;
}
) == context.end()) {
) == context.end())
{
// unable to find the token, we should throw an error to tell the user! (or admin in this case)
BLT_WARN("Unable to find token '%s'!", tokenString.c_str());
BLT_WARN("Unable to find token '%s'!", token.c_str());
} else
buffer += context[tokenString];
break;
}
}
tokenString += c;
}
} else { // otherwise we should write out the characters since this isn't a extended template
buffer += '{';
buffer += c;
}
break;
default:
buffer += c;
break;
}
buffer += context[token];
} else
buffer += lexer.consume();
}
return buffer;
}
void HTMLPage::resolveResources() {
void HTMLPage::resolveResources()
{
}
}

View File

@ -5,12 +5,16 @@
#include <blt/std/string.h>
#include <curl/curl.h>
namespace cs {
namespace cs
{
namespace parser {
Post::Post(const std::string& input) {
namespace parser
{
Post::Post(const std::string& input)
{
auto pairs = blt::string::split(input, "&");
for (const auto& pair : pairs) {
for (const auto& pair : pairs)
{
auto kv = blt::string::split(pair, "=");
auto key = kv[0];
auto value = kv[1];
@ -22,13 +26,16 @@ namespace cs {
}
}
const std::string& Post::operator[](const std::string& key) {
const std::string& Post::operator[](const std::string& key)
{
return m_Values[key];
}
std::string Post::dump() {
std::string Post::dump()
{
std::string out;
for (const auto& pair : m_Values) {
for (const auto& pair : m_Values)
{
out += "[";
out += pair.first;
out += "] = ";
@ -44,8 +51,10 @@ namespace cs {
}
}
namespace fs {
std::string createStaticFilePath(const std::string& file) {
namespace fs
{
std::string createStaticFilePath(const std::string& file)
{
auto path = std::string(CROW_STATIC_DIRECTORY);
if (!path.ends_with('/'))
path += '/';
@ -55,7 +64,9 @@ namespace cs {
throw std::runtime_error("Unable to create file path because file does not exist!");
return path;
}
std::string createWebFilePath(const std::string& file){
std::string createWebFilePath(const std::string& file)
{
auto path = std::string(SITE_FILES_PATH);
if (!path.ends_with('/'))
path += '/';

View File

@ -123,8 +123,24 @@ crow::response handle_root_page(const site_params& params)
// BLT_TRACE("URL: %s = %s", v.c_str(), req.url_params.get(v));
if (params.name.ends_with(".html"))
{
checkAndUpdateUserSession(params.app, params.req);
auto& session = params.app.get_context<Session>(params.req);
auto s_clientID = session.get("clientID", "");
auto s_clientToken = session.get("clientToken", "");
auto user_perms = cs::getUserPermissions(cs::getUserFromID(s_clientID));
crow::mustache::context ctx;
// we don't want to pass all get parameters to the context to prevent leaking
// pass perms in
if (user_perms & cs::PERM_ADMIN)
ctx["_admin"] = true;
if (cs::isUserLoggedIn(s_clientID, s_clientToken))
{
ctx["_logged_in"] = true;
}
// we don't want to pass all get parameters to the context to prevent leaking information
auto referer = params.req.url_params.get("referer");
if (referer)
ctx["referer"] = referer;
@ -132,14 +148,14 @@ crow::response handle_root_page(const site_params& params)
return page.render(ctx);
}
// crow::mustache::context ctx({{"person", name}});
// auto user_page = crow::mustache::compile(engine.fetch("index.html"));
return params.engine.fetch("default.html");
}
crow::response handle_auth_page(const site_params& params, uint32_t required_perms)
crow::response handle_auth_page(const site_params& params)
{
if (isUserAdmin(params.app, params.req))
return redirect("/login.html");
return handle_root_page(params);
@ -247,6 +263,7 @@ int main(int argc, const char** argv)
if (!cs::storeUserData(pp["username"], user_agent, data))
{
BLT_ERROR("Failed to update user data");
return redirect("login.html");
}
session.set("clientID", data.clientID);