Upgrade StringLexer with better design principles.

Strings are now processed in a much clearer, more intuitive way
main
Brett 2023-08-20 20:24:11 -04:00
parent 22a89031c3
commit dd96940c67
13 changed files with 242 additions and 144 deletions

Binary file not shown.

View File

@ -1,2 +1 @@
l5yQfzNDLXuq6Ic1 1692481044 aubTl45vzPn5feHw 1692660008
7IgWbfRRG3liKhkP 1692489339

View File

@ -1 +0,0 @@
{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"TF/rwm67DntB0hrdGiPpYRPFvnZ786r8nrZ4+WQ6wUang4xbqNaZ0AUpXKcHeswaC+IwR0891JZtXP+4XcHsQA=="}

View File

@ -0,0 +1 @@
{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"Tcl8i/S1Czz+UGS6NzeRu/Hyk66oJjYbsRsm3tPqd/AVt2yAVbFEEi/oGdaoIlTriQf5TX7heYPxqdcGMmLRVg=="}

View File

@ -1 +0,0 @@
{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"qJNyHpcA1b1EY6/so2rH3FhBilMKDDnCJSliaeytHMh4V0kj2yy4Mb1UEO7dW/uYdgfNWn73dwCrioovCe1NHg=="}

View File

@ -3,6 +3,12 @@
<head> <head>
<link rel="stylesheet" href="/static/css/home.css"> <link rel="stylesheet" href="/static/css/home.css">
<link rel="stylesheet" href="/static/css/bar.css"> <link rel="stylesheet" href="/static/css/bar.css">
<link rel="javascript" href="cdnjs.cloudflare.com/ajax/libs/jquery/2.1.3/jquery.min.js">
<meta charset="UTF-8">
<meta name="description" content="">
<meta name="keywords" content="">
<meta name="author" content="Brett">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{$SITE_TITLE}}</title> <title>{{$SITE_TITLE}}</title>
</head> </head>
<body class="blur-bgimage" style="background-image: url({{$SITE_BACKGROUND}})"> <body class="blur-bgimage" style="background-image: url({{$SITE_BACKGROUND}})">
@ -14,6 +20,9 @@
</div> </div>
<div class="center"> <div class="center">
HAXsdsad HAXsdsad
{{#_admin}}
Admin detected
{{/_admin}}
</div> </div>
</div> </div>
</div> </div>

View File

@ -39,6 +39,7 @@ namespace cs {
* @return memory usage of the pages cache in bytes * @return memory usage of the pages cache in bytes
*/ */
uint64_t calculateMemoryUsage(); uint64_t calculateMemoryUsage();
void resolveLinks(const std::string& file, HTMLPage& page);
void loadPage(const std::string& path); void loadPage(const std::string& path);
/** /**
* Prunes the cache starting with the oldest pages we have loaded. (in bytes) * Prunes the cache starting with the oldest pages we have loaded. (in bytes)

View File

@ -8,9 +8,56 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <crowsite/config.h> #include <crowsite/config.h>
#include <utility>
namespace cs { namespace cs {
struct StringLexer
{
private:
std::string str;
size_t index = 0;
public:
explicit StringLexer(std::string str): str(std::move(str))
{}
inline bool hasNext()
{
if (index >= str.size())
return false;
return true;
}
inline bool hasTemplatePrefix(char c)
{
if (index + 2 >= str.size())
return false;
return str[index] == '{' && str[index + 1] == '{' && str[index + 2] == c;
}
inline bool hasTemplateSuffix()
{
if (index + 1 >= str.size())
return false;
return str[index] == '}' && str[index + 1] == '}';
}
inline void consumeTemplatePrefix()
{
index += 3;
}
inline void consumeTemplateSuffix()
{
index += 2;
}
inline char consume()
{
return str[index++];
}
};
class StaticContext { class StaticContext {
private: private:
HASHMAP<std::string, std::string> replacements; HASHMAP<std::string, std::string> replacements;
@ -46,7 +93,7 @@ namespace cs {
*/ */
std::string render(StaticContext& context); std::string render(StaticContext& context);
inline std::string const& getRawSite() { inline std::string& getRawSite() {
return m_SiteData; return m_SiteData;
} }
}; };

View File

@ -8,23 +8,28 @@
#include <algorithm> #include <algorithm>
#include <blt/std/time.h> #include <blt/std/time.h>
namespace cs { namespace cs
{
double toSeconds(uint64_t v){ double toSeconds(uint64_t v)
return (double)(v) / 1000000000.0; {
return (double) (v) / 1000000000.0;
} }
CacheEngine::CacheEngine(StaticContext& context, const CacheSettings& settings): m_Context(context), CacheEngine::CacheEngine(StaticContext& context, const CacheSettings& settings): m_Context(context),
m_Settings((settings)) {} m_Settings((settings))
{}
uint64_t CacheEngine::calculateMemoryUsage(const std::string& path, const CacheEngine::CacheValue& value) { uint64_t CacheEngine::calculateMemoryUsage(const std::string& path, const CacheEngine::CacheValue& value)
{
uint64_t pageContentSize = path.size() * sizeof(char); uint64_t pageContentSize = path.size() * sizeof(char);
pageContentSize += value.page->getRawSite().size() * sizeof(char); pageContentSize += value.page->getRawSite().size() * sizeof(char);
pageContentSize += value.renderedPage.size() * sizeof(char); pageContentSize += value.renderedPage.size() * sizeof(char);
return pageContentSize; return pageContentSize;
} }
uint64_t CacheEngine::calculateMemoryUsage() { uint64_t CacheEngine::calculateMemoryUsage()
{
auto pagesBaseSize = m_Pages.size() * sizeof(CacheValue); auto pagesBaseSize = m_Pages.size() * sizeof(CacheValue);
uint64_t pageContentSizes = 0; uint64_t pageContentSizes = 0;
@ -34,24 +39,30 @@ namespace cs {
return pagesBaseSize + pageContentSizes; return pagesBaseSize + pageContentSizes;
} }
const std::string& CacheEngine::fetch(const std::string& path) { const std::string& CacheEngine::fetch(const std::string& path)
{
bool load = false; bool load = false;
auto find = m_Pages.find(path); auto find = m_Pages.find(path);
if (find == m_Pages.end()){ if (find == m_Pages.end())
{
BLT_DEBUG("Page '%s' was not found in cache, loading now!", path.c_str()); BLT_DEBUG("Page '%s' was not found in cache, loading now!", path.c_str());
load = true; load = true;
} else { } else
{
auto lastWrite = std::filesystem::last_write_time(cs::fs::createWebFilePath(path)); auto lastWrite = std::filesystem::last_write_time(cs::fs::createWebFilePath(path));
if (lastWrite != m_Pages[path].lastModified) { if (lastWrite != m_Pages[path].lastModified)
{
load = true; load = true;
BLT_DEBUG("Page '%s' has been modified! Reloading now!", path.c_str()); BLT_DEBUG("Page '%s' has been modified! Reloading now!", path.c_str());
} }
} }
if (load) { if (load)
{
auto memory = calculateMemoryUsage(); auto memory = calculateMemoryUsage();
if (memory > m_Settings.hardMaxMemory) { if (memory > m_Settings.hardMaxMemory)
{
BLT_WARN("Hard memory limit was reached! Pruning to soft limit now!"); BLT_WARN("Hard memory limit was reached! Pruning to soft limit now!");
prune( prune(
m_Settings.hardMaxMemory - m_Settings.softMaxMemory m_Settings.hardMaxMemory - m_Settings.softMaxMemory
@ -59,7 +70,8 @@ namespace cs {
); );
} }
if (memory > m_Settings.softMaxMemory) { if (memory > m_Settings.softMaxMemory)
{
auto amount = std::min(m_Settings.softPruneAmount, memory - m_Settings.softMaxMemory); auto amount = std::min(m_Settings.softPruneAmount, memory - m_Settings.softMaxMemory);
BLT_INFO("Soft memory limit was reached! Pruning %d bytes of memory", amount); BLT_INFO("Soft memory limit was reached! Pruning %d bytes of memory", amount);
prune(amount); prune(amount);
@ -73,11 +85,13 @@ namespace cs {
return m_Pages[path].renderedPage; return m_Pages[path].renderedPage;
} }
void CacheEngine::loadPage(const std::string& path) { void CacheEngine::loadPage(const std::string& path)
{
auto start = blt::system::getCurrentTimeNanoseconds(); auto start = blt::system::getCurrentTimeNanoseconds();
auto fullPath = cs::fs::createWebFilePath(path); auto fullPath = cs::fs::createWebFilePath(path);
auto page = HTMLPage::load(fullPath); auto page = HTMLPage::load(fullPath);
resolveLinks(path, *page);
auto renderedPage = page->render(m_Context); auto renderedPage = page->render(m_Context);
m_Pages[path] = CacheValue{ m_Pages[path] = CacheValue{
blt::system::getCurrentTimeNanoseconds(), blt::system::getCurrentTimeNanoseconds(),
@ -90,8 +104,10 @@ namespace cs {
BLT_INFO("Loaded page %s in %fms", path.c_str(), (end - start) / 1000000.0); BLT_INFO("Loaded page %s in %fms", path.c_str(), (end - start) / 1000000.0);
} }
void CacheEngine::prune(uint64_t amount) { void CacheEngine::prune(uint64_t amount)
struct CacheSorting_t { {
struct CacheSorting_t
{
uint64_t memoryUsage; uint64_t memoryUsage;
std::string key; std::string key;
}; };
@ -100,15 +116,19 @@ namespace cs {
for (auto& page : m_Pages) for (auto& page : m_Pages)
cachedPages.emplace_back(calculateMemoryUsage(page.first, page.second), page.first); cachedPages.emplace_back(calculateMemoryUsage(page.first, page.second), page.first);
std::sort(cachedPages.begin(), cachedPages.end(), [&](const CacheSorting_t& i1, const CacheSorting_t& i2) -> bool { std::sort(
return m_Pages[i1.key].cacheTime < m_Pages[i2.key].cacheTime; cachedPages.begin(), cachedPages.end(), [&](const CacheSorting_t& i1, const CacheSorting_t& i2) -> bool {
}); return m_Pages[i1.key].cacheTime < m_Pages[i2.key].cacheTime;
}
);
uint64_t prunedAmount = 0; uint64_t prunedAmount = 0;
uint64_t prunedPages = 0; uint64_t prunedPages = 0;
while (prunedAmount < amount){ while (prunedAmount < amount)
{
auto page = cachedPages[0]; auto page = cachedPages[0];
BLT_TRACE("Pruning page (%d bytes) aged %f seconds", page.memoryUsage, toSeconds(blt::system::getCurrentTimeNanoseconds() - m_Pages[page.key].cacheTime)); BLT_TRACE("Pruning page (%d bytes) aged %f seconds", page.memoryUsage,
toSeconds(blt::system::getCurrentTimeNanoseconds() - m_Pages[page.key].cacheTime));
prunedAmount += page.memoryUsage; prunedAmount += page.memoryUsage;
m_Pages.erase(page.key); m_Pages.erase(page.key);
prunedPages++; prunedPages++;
@ -117,5 +137,49 @@ namespace cs {
BLT_INFO("Pruned %d pages", prunedPages); BLT_INFO("Pruned %d pages", prunedPages);
} }
void CacheEngine::resolveLinks(const std::string& file, HTMLPage& page)
{
StringLexer lexer(page.getRawSite());
std::string resolvedSite;
const std::string valid_file_endings[3] = {
".css",
".js",
".part",
};
while (lexer.hasNext())
{
if (lexer.hasTemplatePrefix('@'))
{
lexer.consumeTemplatePrefix();
std::string token;
while (!lexer.hasTemplateSuffix()) {
if (!lexer.hasNext()) {
BLT_WARN("Invalid template syntax. EOF occurred before template was fully processed!");
break;
}
token += lexer.consume();
}
lexer.consumeTemplateSuffix();
for (const auto& suffix : valid_file_endings){
if (token.ends_with(suffix)) {
auto path = cs::fs::createWebFilePath(token);
if (path == file){
BLT_WARN("Recursive load detected!");
BLT_WARN("Caching Engine will ignore this attempt, however, it is recommended that you remove the recursive call.");
BLT_WARN("Detected in file '%s' offending link '%s'", file.c_str(), token.c_str());
}
resolvedSite += fetch(path);
break;
}
}
} else
resolvedSite += lexer.consume();
}
page.getRawSite() = resolvedSite;
}
} }

View File

@ -10,88 +10,48 @@
#include <sstream> #include <sstream>
#include <algorithm> #include <algorithm>
namespace cs { namespace cs
{
class LexerSyntaxException : public std::runtime_error { class LexerSyntaxException : public std::runtime_error
{
public: public:
explicit LexerSyntaxException(const std::string& token): explicit LexerSyntaxException(const std::string& token):
std::runtime_error( std::runtime_error(
"Extended-mustache syntax error! An opening '{{' must be closed by '}}'! (near: '" + "Extended-mustache syntax error! An opening '{{' must be closed by '}}'! (near: '" +
token + "')" token + "')"
) {} )
{}
}; };
class LexerException : public std::runtime_error { class LexerException : public std::runtime_error
{
public: public:
explicit LexerException(const std::string& message): explicit LexerException(const std::string& message):
std::runtime_error("Extended-mustache syntax processing error! " + message) {} std::runtime_error("Extended-mustache syntax processing error! " + message)
{}
}; };
class SyntaxException : public std::runtime_error { class SyntaxException : public std::runtime_error
{
public: public:
explicit SyntaxException(): explicit SyntaxException():
std::runtime_error( std::runtime_error(
"Extended-mustache syntax error! Static context keys should not contain $" "Extended-mustache syntax error! Static context keys should not contain $"
) {} )
{}
}; };
class StringLexer { std::unique_ptr<HTMLPage> HTMLPage::load(const std::string& path)
private: {
const std::string& str;
size_t pos = 0;
public:
explicit StringLexer(const std::string& str): str(str) {}
inline char nextToken() {
if (pos >= str.size())
return '\0';
return str[pos++];
}
inline bool hasTokens() {
return pos < str.size();
}
/**
* Tries to find the string 'match' and outputs all found characters to 'out'
* @param match string to match against
* @param out characters 'tokens' read by the lexer
* @return true if found false otherwise;
*/
inline bool findNext(const std::string& match, std::string& out) {
char c;
size_t p = 0;
std::string found;
while ((c = nextToken())) {
// check for match, p should be 0 here!
if (c == match[p]) {
do {
found += c;
// emit token
out += c;
if (found == match){
// TODO?
}
if (c != match[p++]){
p = 0;
found = "";
break;
}
} while ((c = nextToken()));
} else // emit token
out += c;
}
return false;
}
};
std::unique_ptr<HTMLPage> HTMLPage::load(const std::string& path) {
std::string htmlSource; std::string htmlSource;
std::ifstream htmlFile; std::ifstream htmlFile;
if (!htmlFile.good()) if (!htmlFile.good())
BLT_ERROR("Input stream not good!\n"); BLT_ERROR("Input stream not good!\n");
// ensure we can throw exceptions: // ensure we can throw exceptions:
htmlFile.exceptions(std::ifstream::failbit | std::ifstream::badbit); htmlFile.exceptions(std::ifstream::failbit | std::ifstream::badbit);
try { try
{
// open file // open file
htmlFile.open(path); htmlFile.open(path);
std::stringstream htmlAsStream; std::stringstream htmlAsStream;
@ -101,7 +61,8 @@ namespace cs {
htmlFile.close(); htmlFile.close();
// convert stream into std::string // convert stream into std::string
htmlSource = htmlAsStream.str(); htmlSource = htmlAsStream.str();
} catch (std::ifstream::failure& e) { } catch (std::ifstream::failure& e)
{
BLT_ERROR("Unable to read file '%s'!\n", path.c_str()); BLT_ERROR("Unable to read file '%s'!\n", path.c_str());
BLT_ERROR("Exception: %s", e.what()); BLT_ERROR("Exception: %s", e.what());
throw std::runtime_error("Failed to read file!\n"); throw std::runtime_error("Failed to read file!\n");
@ -109,63 +70,53 @@ namespace cs {
return std::make_unique<HTMLPage>(HTMLPage(htmlSource)); return std::make_unique<HTMLPage>(HTMLPage(htmlSource));
} }
HTMLPage::HTMLPage(std::string siteData): m_SiteData(std::move(siteData)) {} HTMLPage::HTMLPage(std::string siteData): m_SiteData(std::move(siteData))
{}
std::string HTMLPage::render(StaticContext& context) { std::string HTMLPage::render(StaticContext& context)
{
std::string processedSiteData = m_SiteData; std::string processedSiteData = m_SiteData;
std::string buffer; std::string buffer;
StringLexer lexer(processedSiteData); StringLexer lexer(processedSiteData);
while (lexer.hasTokens()) { while (lexer.hasNext())
char c; {
switch ((c = lexer.nextToken())) { if (lexer.hasTemplatePrefix('$'))
case '{': {
// if we are dealing with a mustache template then we should process lexer.consumeTemplatePrefix();
if ((c = lexer.nextToken()) == '{') { std::string token;
// if it is not the extended syntax we are looking for, skip it as crow will handle it at request time! while (!lexer.hasTemplateSuffix())
if ((c = lexer.nextToken()) != '$') { {
buffer += "{{"; if (!lexer.hasNext())
buffer += c; {
break; BLT_FATAL("Invalid template syntax. EOF occurred before template was fully processed!");
} throw LexerSyntaxException(token);
std::string tokenString;
while ((c = lexer.nextToken())) {
if (c == '}') {
if (lexer.nextToken() != '}')
throw LexerSyntaxException(tokenString);
else {
if (std::find_if(
context.begin(), context.end(),
[&tokenString](auto in) -> bool {
return tokenString == in.first;
}
) == context.end()) {
// unable to find the token, we should throw an error to tell the user! (or admin in this case)
BLT_WARN("Unable to find token '%s'!", tokenString.c_str());
} else
buffer += context[tokenString];
break;
}
}
tokenString += c;
}
} else { // otherwise we should write out the characters since this isn't a extended template
buffer += '{';
buffer += c;
} }
break; token += lexer.consume();
default: }
buffer += c; lexer.consumeTemplateSuffix();
break; if (std::find_if(
} context.begin(), context.end(),
[&token](auto in) -> bool {
return token == in.first;
}
) == context.end())
{
// unable to find the token, we should throw an error to tell the user! (or admin in this case)
BLT_WARN("Unable to find token '%s'!", token.c_str());
} else
buffer += context[token];
} else
buffer += lexer.consume();
} }
return buffer; return buffer;
} }
void HTMLPage::resolveResources() { void HTMLPage::resolveResources()
{
} }
} }

View File

@ -5,12 +5,16 @@
#include <blt/std/string.h> #include <blt/std/string.h>
#include <curl/curl.h> #include <curl/curl.h>
namespace cs { namespace cs
{
namespace parser { namespace parser
Post::Post(const std::string& input) { {
Post::Post(const std::string& input)
{
auto pairs = blt::string::split(input, "&"); auto pairs = blt::string::split(input, "&");
for (const auto& pair : pairs) { for (const auto& pair : pairs)
{
auto kv = blt::string::split(pair, "="); auto kv = blt::string::split(pair, "=");
auto key = kv[0]; auto key = kv[0];
auto value = kv[1]; auto value = kv[1];
@ -22,13 +26,16 @@ namespace cs {
} }
} }
const std::string& Post::operator[](const std::string& key) { const std::string& Post::operator[](const std::string& key)
{
return m_Values[key]; return m_Values[key];
} }
std::string Post::dump() { std::string Post::dump()
{
std::string out; std::string out;
for (const auto& pair : m_Values) { for (const auto& pair : m_Values)
{
out += "["; out += "[";
out += pair.first; out += pair.first;
out += "] = "; out += "] = ";
@ -44,8 +51,10 @@ namespace cs {
} }
} }
namespace fs { namespace fs
std::string createStaticFilePath(const std::string& file) { {
std::string createStaticFilePath(const std::string& file)
{
auto path = std::string(CROW_STATIC_DIRECTORY); auto path = std::string(CROW_STATIC_DIRECTORY);
if (!path.ends_with('/')) if (!path.ends_with('/'))
path += '/'; path += '/';
@ -55,7 +64,9 @@ namespace cs {
throw std::runtime_error("Unable to create file path because file does not exist!"); throw std::runtime_error("Unable to create file path because file does not exist!");
return path; return path;
} }
std::string createWebFilePath(const std::string& file){
std::string createWebFilePath(const std::string& file)
{
auto path = std::string(SITE_FILES_PATH); auto path = std::string(SITE_FILES_PATH);
if (!path.ends_with('/')) if (!path.ends_with('/'))
path += '/'; path += '/';

View File

@ -123,23 +123,39 @@ crow::response handle_root_page(const site_params& params)
// BLT_TRACE("URL: %s = %s", v.c_str(), req.url_params.get(v)); // BLT_TRACE("URL: %s = %s", v.c_str(), req.url_params.get(v));
if (params.name.ends_with(".html")) if (params.name.ends_with(".html"))
{ {
checkAndUpdateUserSession(params.app, params.req);
auto& session = params.app.get_context<Session>(params.req);
auto s_clientID = session.get("clientID", "");
auto s_clientToken = session.get("clientToken", "");
auto user_perms = cs::getUserPermissions(cs::getUserFromID(s_clientID));
crow::mustache::context ctx; crow::mustache::context ctx;
// we don't want to pass all get parameters to the context to prevent leaking
// pass perms in
if (user_perms & cs::PERM_ADMIN)
ctx["_admin"] = true;
if (cs::isUserLoggedIn(s_clientID, s_clientToken))
{
ctx["_logged_in"] = true;
}
// we don't want to pass all get parameters to the context to prevent leaking information
auto referer = params.req.url_params.get("referer"); auto referer = params.req.url_params.get("referer");
if (referer) if (referer)
ctx["referer"] = referer; ctx["referer"] = referer;
auto page = crow::mustache::compile(params.engine.fetch(params.name)); auto page = crow::mustache::compile(params.engine.fetch(params.name));
return page.render(ctx); return page.render(ctx);
} }
// crow::mustache::context ctx({{"person", name}});
// auto user_page = crow::mustache::compile(engine.fetch("index.html"));
return params.engine.fetch("default.html"); return params.engine.fetch("default.html");
} }
crow::response handle_auth_page(const site_params& params, uint32_t required_perms) crow::response handle_auth_page(const site_params& params)
{ {
if (isUserAdmin(params.app, params.req))
return redirect("/login.html");
return handle_root_page(params); return handle_root_page(params);
@ -247,6 +263,7 @@ int main(int argc, const char** argv)
if (!cs::storeUserData(pp["username"], user_agent, data)) if (!cs::storeUserData(pp["username"], user_agent, data))
{ {
BLT_ERROR("Failed to update user data"); BLT_ERROR("Failed to update user data");
return redirect("login.html");
} }
session.set("clientID", data.clientID); session.set("clientID", data.clientID);