From dd96940c67cbda155f16c149b901b157f4cfdcc4 Mon Sep 17 00:00:00 2001 From: Brett Laptop <tri11paragon@tpgc.me> Date: Sun, 20 Aug 2023 20:24:11 -0400 Subject: [PATCH] Upgrade StringLexer with better design principles. Strings are now processed in a much clearer, more intuitive way --- crow_test/data/db/users.sqlite | Bin 20480 -> 20480 bytes crow_test/data/session/.expirations | 3 +- crow_test/data/session/7IgWbfRRG3liKhkP.json | 1 - crow_test/data/session/aubTl45vzPn5feHw.json | 1 + crow_test/data/session/l5yQfzNDLXuq6Ic1.json | 1 - crow_test/webcontent/index.html | 9 + ...{not_authorized.response => menu_bar.part} | 0 include/crowsite/site/cache.h | 1 + include/crowsite/site/web.h | 49 +++++- src/crowsite/site/cache.cpp | 106 +++++++++--- src/crowsite/site/web.cpp | 157 ++++++------------ src/crowsite/utility.cpp | 31 ++-- src/main.cpp | 27 ++- 13 files changed, 242 insertions(+), 144 deletions(-) delete mode 100644 crow_test/data/session/7IgWbfRRG3liKhkP.json create mode 100644 crow_test/data/session/aubTl45vzPn5feHw.json delete mode 100644 crow_test/data/session/l5yQfzNDLXuq6Ic1.json rename crow_test/webcontent/{not_authorized.response => menu_bar.part} (100%) diff --git a/crow_test/data/db/users.sqlite b/crow_test/data/db/users.sqlite index 9f8cabf2b68246bbbefed1e908515a05520caec0..e38c63435abb9c5d05c98923ae5467209db2678f 100644 GIT binary patch delta 121 zcmZozz}T>Wae_3X#6%fqMv09Hr{oy9Cu_^sg(l}%Wa<YSI#*R`hq?!w`BkL`mFjy` zW}BJidu2r?6$cgP8kYnVrsz9{l^9hzh9$YVx@PL<yQd`Pd**}`Wd^31hD4ZWq(%l* Z6s9D*`{w!tg{5!)C(kIr$i2uw0RUEfC%pgw delta 121 zcmZozz}T>Wae_3X_(U0JM)8dar{oy9CTq*rg}Uh%mFJq7yX2KP8DtctxMv0wL<R-8 zmE}d5TbLDD<P}AkXom-ym4_zgrJGbF75XJc890U(M0h8Aq!yPaI%|8D2N_sc8hS;Q ZL<DG?L?n9@2Rd&4C(kIr$hF8o0RTWICYk^M diff --git a/crow_test/data/session/.expirations b/crow_test/data/session/.expirations index 2af2d9c..63655a4 100644 --- a/crow_test/data/session/.expirations +++ b/crow_test/data/session/.expirations @@ -1,2 +1 @@ -l5yQfzNDLXuq6Ic1 1692481044 -7IgWbfRRG3liKhkP 1692489339 +aubTl45vzPn5feHw 1692660008 diff --git a/crow_test/data/session/7IgWbfRRG3liKhkP.json b/crow_test/data/session/7IgWbfRRG3liKhkP.json deleted file mode 100644 index c9db0f4..0000000 --- a/crow_test/data/session/7IgWbfRRG3liKhkP.json +++ /dev/null @@ -1 +0,0 @@ -{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"TF/rwm67DntB0hrdGiPpYRPFvnZ786r8nrZ4+WQ6wUang4xbqNaZ0AUpXKcHeswaC+IwR0891JZtXP+4XcHsQA=="} \ No newline at end of file diff --git a/crow_test/data/session/aubTl45vzPn5feHw.json b/crow_test/data/session/aubTl45vzPn5feHw.json new file mode 100644 index 0000000..e225beb --- /dev/null +++ b/crow_test/data/session/aubTl45vzPn5feHw.json @@ -0,0 +1 @@ +{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"Tcl8i/S1Czz+UGS6NzeRu/Hyk66oJjYbsRsm3tPqd/AVt2yAVbFEEi/oGdaoIlTriQf5TX7heYPxqdcGMmLRVg=="} \ No newline at end of file diff --git a/crow_test/data/session/l5yQfzNDLXuq6Ic1.json b/crow_test/data/session/l5yQfzNDLXuq6Ic1.json deleted file mode 100644 index 21f31f4..0000000 --- a/crow_test/data/session/l5yQfzNDLXuq6Ic1.json +++ /dev/null @@ -1 +0,0 @@ -{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"qJNyHpcA1b1EY6/so2rH3FhBilMKDDnCJSliaeytHMh4V0kj2yy4Mb1UEO7dW/uYdgfNWn73dwCrioovCe1NHg=="} \ No newline at end of file diff --git a/crow_test/webcontent/index.html b/crow_test/webcontent/index.html index d4eebc7..83bfee4 100644 --- a/crow_test/webcontent/index.html +++ b/crow_test/webcontent/index.html @@ -3,6 +3,12 @@ <head> <link rel="stylesheet" href="/static/css/home.css"> <link rel="stylesheet" href="/static/css/bar.css"> + <link rel="javascript" href="cdnjs.cloudflare.com/ajax/libs/jquery/2.1.3/jquery.min.js"> + <meta charset="UTF-8"> + <meta name="description" content=""> + <meta name="keywords" content=""> + <meta name="author" content="Brett"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>{{$SITE_TITLE}}</title> </head> <body class="blur-bgimage" style="background-image: url({{$SITE_BACKGROUND}})"> @@ -14,6 +20,9 @@ </div> <div class="center"> HAXsdsad + {{#_admin}} + Admin detected + {{/_admin}} </div> </div> </div> diff --git a/crow_test/webcontent/not_authorized.response b/crow_test/webcontent/menu_bar.part similarity index 100% rename from crow_test/webcontent/not_authorized.response rename to crow_test/webcontent/menu_bar.part diff --git a/include/crowsite/site/cache.h b/include/crowsite/site/cache.h index ce9e9de..cb1012e 100644 --- a/include/crowsite/site/cache.h +++ b/include/crowsite/site/cache.h @@ -39,6 +39,7 @@ namespace cs { * @return memory usage of the pages cache in bytes */ uint64_t calculateMemoryUsage(); + void resolveLinks(const std::string& file, HTMLPage& page); void loadPage(const std::string& path); /** * Prunes the cache starting with the oldest pages we have loaded. (in bytes) diff --git a/include/crowsite/site/web.h b/include/crowsite/site/web.h index 8c67ac7..8268669 100644 --- a/include/crowsite/site/web.h +++ b/include/crowsite/site/web.h @@ -8,9 +8,56 @@ #include <memory> #include <string> #include <crowsite/config.h> +#include <utility> namespace cs { + struct StringLexer + { + private: + std::string str; + size_t index = 0; + public: + explicit StringLexer(std::string str): str(std::move(str)) + {} + + inline bool hasNext() + { + if (index >= str.size()) + return false; + return true; + } + + inline bool hasTemplatePrefix(char c) + { + if (index + 2 >= str.size()) + return false; + return str[index] == '{' && str[index + 1] == '{' && str[index + 2] == c; + } + + inline bool hasTemplateSuffix() + { + if (index + 1 >= str.size()) + return false; + return str[index] == '}' && str[index + 1] == '}'; + } + + inline void consumeTemplatePrefix() + { + index += 3; + } + + inline void consumeTemplateSuffix() + { + index += 2; + } + + inline char consume() + { + return str[index++]; + } + }; + class StaticContext { private: HASHMAP<std::string, std::string> replacements; @@ -46,7 +93,7 @@ namespace cs { */ std::string render(StaticContext& context); - inline std::string const& getRawSite() { + inline std::string& getRawSite() { return m_SiteData; } }; diff --git a/src/crowsite/site/cache.cpp b/src/crowsite/site/cache.cpp index 60317fe..34fdd4d 100644 --- a/src/crowsite/site/cache.cpp +++ b/src/crowsite/site/cache.cpp @@ -8,23 +8,28 @@ #include <algorithm> #include <blt/std/time.h> -namespace cs { +namespace cs +{ - double toSeconds(uint64_t v){ - return (double)(v) / 1000000000.0; + double toSeconds(uint64_t v) + { + return (double) (v) / 1000000000.0; } CacheEngine::CacheEngine(StaticContext& context, const CacheSettings& settings): m_Context(context), - m_Settings((settings)) {} + m_Settings((settings)) + {} - uint64_t CacheEngine::calculateMemoryUsage(const std::string& path, const CacheEngine::CacheValue& value) { + uint64_t CacheEngine::calculateMemoryUsage(const std::string& path, const CacheEngine::CacheValue& value) + { uint64_t pageContentSize = path.size() * sizeof(char); pageContentSize += value.page->getRawSite().size() * sizeof(char); pageContentSize += value.renderedPage.size() * sizeof(char); return pageContentSize; } - uint64_t CacheEngine::calculateMemoryUsage() { + uint64_t CacheEngine::calculateMemoryUsage() + { auto pagesBaseSize = m_Pages.size() * sizeof(CacheValue); uint64_t pageContentSizes = 0; @@ -34,24 +39,30 @@ namespace cs { return pagesBaseSize + pageContentSizes; } - const std::string& CacheEngine::fetch(const std::string& path) { + const std::string& CacheEngine::fetch(const std::string& path) + { bool load = false; auto find = m_Pages.find(path); - if (find == m_Pages.end()){ + if (find == m_Pages.end()) + { BLT_DEBUG("Page '%s' was not found in cache, loading now!", path.c_str()); load = true; - } else { + } else + { auto lastWrite = std::filesystem::last_write_time(cs::fs::createWebFilePath(path)); - if (lastWrite != m_Pages[path].lastModified) { + if (lastWrite != m_Pages[path].lastModified) + { load = true; BLT_DEBUG("Page '%s' has been modified! Reloading now!", path.c_str()); } } - if (load) { + if (load) + { auto memory = calculateMemoryUsage(); - if (memory > m_Settings.hardMaxMemory) { + if (memory > m_Settings.hardMaxMemory) + { BLT_WARN("Hard memory limit was reached! Pruning to soft limit now!"); prune( m_Settings.hardMaxMemory - m_Settings.softMaxMemory @@ -59,7 +70,8 @@ namespace cs { ); } - if (memory > m_Settings.softMaxMemory) { + if (memory > m_Settings.softMaxMemory) + { auto amount = std::min(m_Settings.softPruneAmount, memory - m_Settings.softMaxMemory); BLT_INFO("Soft memory limit was reached! Pruning %d bytes of memory", amount); prune(amount); @@ -73,11 +85,13 @@ namespace cs { return m_Pages[path].renderedPage; } - void CacheEngine::loadPage(const std::string& path) { + void CacheEngine::loadPage(const std::string& path) + { auto start = blt::system::getCurrentTimeNanoseconds(); auto fullPath = cs::fs::createWebFilePath(path); auto page = HTMLPage::load(fullPath); + resolveLinks(path, *page); auto renderedPage = page->render(m_Context); m_Pages[path] = CacheValue{ blt::system::getCurrentTimeNanoseconds(), @@ -90,8 +104,10 @@ namespace cs { BLT_INFO("Loaded page %s in %fms", path.c_str(), (end - start) / 1000000.0); } - void CacheEngine::prune(uint64_t amount) { - struct CacheSorting_t { + void CacheEngine::prune(uint64_t amount) + { + struct CacheSorting_t + { uint64_t memoryUsage; std::string key; }; @@ -100,15 +116,19 @@ namespace cs { for (auto& page : m_Pages) cachedPages.emplace_back(calculateMemoryUsage(page.first, page.second), page.first); - std::sort(cachedPages.begin(), cachedPages.end(), [&](const CacheSorting_t& i1, const CacheSorting_t& i2) -> bool { - return m_Pages[i1.key].cacheTime < m_Pages[i2.key].cacheTime; - }); + std::sort( + cachedPages.begin(), cachedPages.end(), [&](const CacheSorting_t& i1, const CacheSorting_t& i2) -> bool { + return m_Pages[i1.key].cacheTime < m_Pages[i2.key].cacheTime; + } + ); uint64_t prunedAmount = 0; uint64_t prunedPages = 0; - while (prunedAmount < amount){ + while (prunedAmount < amount) + { auto page = cachedPages[0]; - BLT_TRACE("Pruning page (%d bytes) aged %f seconds", page.memoryUsage, toSeconds(blt::system::getCurrentTimeNanoseconds() - m_Pages[page.key].cacheTime)); + BLT_TRACE("Pruning page (%d bytes) aged %f seconds", page.memoryUsage, + toSeconds(blt::system::getCurrentTimeNanoseconds() - m_Pages[page.key].cacheTime)); prunedAmount += page.memoryUsage; m_Pages.erase(page.key); prunedPages++; @@ -117,5 +137,49 @@ namespace cs { BLT_INFO("Pruned %d pages", prunedPages); } + void CacheEngine::resolveLinks(const std::string& file, HTMLPage& page) + { + StringLexer lexer(page.getRawSite()); + std::string resolvedSite; + + const std::string valid_file_endings[3] = { + ".css", + ".js", + ".part", + }; + + while (lexer.hasNext()) + { + if (lexer.hasTemplatePrefix('@')) + { + lexer.consumeTemplatePrefix(); + std::string token; + while (!lexer.hasTemplateSuffix()) { + if (!lexer.hasNext()) { + BLT_WARN("Invalid template syntax. EOF occurred before template was fully processed!"); + break; + } + token += lexer.consume(); + } + lexer.consumeTemplateSuffix(); + for (const auto& suffix : valid_file_endings){ + if (token.ends_with(suffix)) { + auto path = cs::fs::createWebFilePath(token); + if (path == file){ + BLT_WARN("Recursive load detected!"); + BLT_WARN("Caching Engine will ignore this attempt, however, it is recommended that you remove the recursive call."); + BLT_WARN("Detected in file '%s' offending link '%s'", file.c_str(), token.c_str()); + } + resolvedSite += fetch(path); + break; + } + } + } else + resolvedSite += lexer.consume(); + } + + page.getRawSite() = resolvedSite; + } + } \ No newline at end of file diff --git a/src/crowsite/site/web.cpp b/src/crowsite/site/web.cpp index dfe6d04..286739f 100644 --- a/src/crowsite/site/web.cpp +++ b/src/crowsite/site/web.cpp @@ -10,88 +10,48 @@ #include <sstream> #include <algorithm> -namespace cs { +namespace cs +{ - class LexerSyntaxException : public std::runtime_error { + class LexerSyntaxException : public std::runtime_error + { public: explicit LexerSyntaxException(const std::string& token): std::runtime_error( "Extended-mustache syntax error! An opening '{{' must be closed by '}}'! (near: '" + token + "')" - ) {} + ) + {} }; - class LexerException : public std::runtime_error { + class LexerException : public std::runtime_error + { public: explicit LexerException(const std::string& message): - std::runtime_error("Extended-mustache syntax processing error! " + message) {} + std::runtime_error("Extended-mustache syntax processing error! " + message) + {} }; - class SyntaxException : public std::runtime_error { + class SyntaxException : public std::runtime_error + { public: explicit SyntaxException(): std::runtime_error( "Extended-mustache syntax error! Static context keys should not contain $" - ) {} + ) + {} }; - class StringLexer { - private: - const std::string& str; - size_t pos = 0; - public: - explicit StringLexer(const std::string& str): str(str) {} - - inline char nextToken() { - if (pos >= str.size()) - return '\0'; - return str[pos++]; - } - - inline bool hasTokens() { - return pos < str.size(); - } - /** - * Tries to find the string 'match' and outputs all found characters to 'out' - * @param match string to match against - * @param out characters 'tokens' read by the lexer - * @return true if found false otherwise; - */ - inline bool findNext(const std::string& match, std::string& out) { - char c; - size_t p = 0; - std::string found; - while ((c = nextToken())) { - // check for match, p should be 0 here! - if (c == match[p]) { - do { - found += c; - // emit token - out += c; - if (found == match){ - // TODO? - } - if (c != match[p++]){ - p = 0; - found = ""; - break; - } - } while ((c = nextToken())); - } else // emit token - out += c; - } - return false; - } - }; - - std::unique_ptr<HTMLPage> HTMLPage::load(const std::string& path) { + std::unique_ptr<HTMLPage> HTMLPage::load(const std::string& path) + { std::string htmlSource; std::ifstream htmlFile; if (!htmlFile.good()) BLT_ERROR("Input stream not good!\n"); // ensure we can throw exceptions: htmlFile.exceptions(std::ifstream::failbit | std::ifstream::badbit); - try { + try + { // open file htmlFile.open(path); std::stringstream htmlAsStream; @@ -101,7 +61,8 @@ namespace cs { htmlFile.close(); // convert stream into std::string htmlSource = htmlAsStream.str(); - } catch (std::ifstream::failure& e) { + } catch (std::ifstream::failure& e) + { BLT_ERROR("Unable to read file '%s'!\n", path.c_str()); BLT_ERROR("Exception: %s", e.what()); throw std::runtime_error("Failed to read file!\n"); @@ -109,63 +70,53 @@ namespace cs { return std::make_unique<HTMLPage>(HTMLPage(htmlSource)); } - HTMLPage::HTMLPage(std::string siteData): m_SiteData(std::move(siteData)) {} + HTMLPage::HTMLPage(std::string siteData): m_SiteData(std::move(siteData)) + {} - std::string HTMLPage::render(StaticContext& context) { + std::string HTMLPage::render(StaticContext& context) + { std::string processedSiteData = m_SiteData; std::string buffer; StringLexer lexer(processedSiteData); - while (lexer.hasTokens()) { - char c; - switch ((c = lexer.nextToken())) { - case '{': - // if we are dealing with a mustache template then we should process - if ((c = lexer.nextToken()) == '{') { - // if it is not the extended syntax we are looking for, skip it as crow will handle it at request time! - if ((c = lexer.nextToken()) != '$') { - buffer += "{{"; - buffer += c; - break; - } - std::string tokenString; - while ((c = lexer.nextToken())) { - if (c == '}') { - if (lexer.nextToken() != '}') - throw LexerSyntaxException(tokenString); - else { - if (std::find_if( - context.begin(), context.end(), - [&tokenString](auto in) -> bool { - return tokenString == in.first; - } - ) == context.end()) { - // unable to find the token, we should throw an error to tell the user! (or admin in this case) - BLT_WARN("Unable to find token '%s'!", tokenString.c_str()); - } else - buffer += context[tokenString]; - break; - } - } - tokenString += c; - } - } else { // otherwise we should write out the characters since this isn't a extended template - buffer += '{'; - buffer += c; + while (lexer.hasNext()) + { + if (lexer.hasTemplatePrefix('$')) + { + lexer.consumeTemplatePrefix(); + std::string token; + while (!lexer.hasTemplateSuffix()) + { + if (!lexer.hasNext()) + { + BLT_FATAL("Invalid template syntax. EOF occurred before template was fully processed!"); + throw LexerSyntaxException(token); } - break; - default: - buffer += c; - break; - } + token += lexer.consume(); + } + lexer.consumeTemplateSuffix(); + if (std::find_if( + context.begin(), context.end(), + [&token](auto in) -> bool { + return token == in.first; + } + ) == context.end()) + { + // unable to find the token, we should throw an error to tell the user! (or admin in this case) + BLT_WARN("Unable to find token '%s'!", token.c_str()); + } else + buffer += context[token]; + } else + buffer += lexer.consume(); } return buffer; } - void HTMLPage::resolveResources() { + void HTMLPage::resolveResources() + { } } diff --git a/src/crowsite/utility.cpp b/src/crowsite/utility.cpp index 6446123..9a3343c 100644 --- a/src/crowsite/utility.cpp +++ b/src/crowsite/utility.cpp @@ -5,12 +5,16 @@ #include <blt/std/string.h> #include <curl/curl.h> -namespace cs { +namespace cs +{ - namespace parser { - Post::Post(const std::string& input) { + namespace parser + { + Post::Post(const std::string& input) + { auto pairs = blt::string::split(input, "&"); - for (const auto& pair : pairs) { + for (const auto& pair : pairs) + { auto kv = blt::string::split(pair, "="); auto key = kv[0]; auto value = kv[1]; @@ -22,13 +26,16 @@ namespace cs { } } - const std::string& Post::operator[](const std::string& key) { + const std::string& Post::operator[](const std::string& key) + { return m_Values[key]; } - std::string Post::dump() { + std::string Post::dump() + { std::string out; - for (const auto& pair : m_Values) { + for (const auto& pair : m_Values) + { out += "["; out += pair.first; out += "] = "; @@ -44,8 +51,10 @@ namespace cs { } } - namespace fs { - std::string createStaticFilePath(const std::string& file) { + namespace fs + { + std::string createStaticFilePath(const std::string& file) + { auto path = std::string(CROW_STATIC_DIRECTORY); if (!path.ends_with('/')) path += '/'; @@ -55,7 +64,9 @@ namespace cs { throw std::runtime_error("Unable to create file path because file does not exist!"); return path; } - std::string createWebFilePath(const std::string& file){ + + std::string createWebFilePath(const std::string& file) + { auto path = std::string(SITE_FILES_PATH); if (!path.ends_with('/')) path += '/'; diff --git a/src/main.cpp b/src/main.cpp index a158511..b786452 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -123,23 +123,39 @@ crow::response handle_root_page(const site_params& params) // BLT_TRACE("URL: %s = %s", v.c_str(), req.url_params.get(v)); if (params.name.ends_with(".html")) { + checkAndUpdateUserSession(params.app, params.req); + auto& session = params.app.get_context<Session>(params.req); + auto s_clientID = session.get("clientID", ""); + auto s_clientToken = session.get("clientToken", ""); + auto user_perms = cs::getUserPermissions(cs::getUserFromID(s_clientID)); + crow::mustache::context ctx; - // we don't want to pass all get parameters to the context to prevent leaking + + // pass perms in + if (user_perms & cs::PERM_ADMIN) + ctx["_admin"] = true; + + if (cs::isUserLoggedIn(s_clientID, s_clientToken)) + { + ctx["_logged_in"] = true; + } + + // we don't want to pass all get parameters to the context to prevent leaking information auto referer = params.req.url_params.get("referer"); if (referer) ctx["referer"] = referer; auto page = crow::mustache::compile(params.engine.fetch(params.name)); return page.render(ctx); } - -// crow::mustache::context ctx({{"person", name}}); -// auto user_page = crow::mustache::compile(engine.fetch("index.html")); return params.engine.fetch("default.html"); } -crow::response handle_auth_page(const site_params& params, uint32_t required_perms) +crow::response handle_auth_page(const site_params& params) { + if (isUserAdmin(params.app, params.req)) + return redirect("/login.html"); + return handle_root_page(params); @@ -247,6 +263,7 @@ int main(int argc, const char** argv) if (!cs::storeUserData(pp["username"], user_agent, data)) { BLT_ERROR("Failed to update user data"); + return redirect("login.html"); } session.set("clientID", data.clientID);