Upgrade StringLexer with better design principles.

Strings are now processed in a much clearer, more intuitive way
2023-08-20 20:24:11 -04:00 · 2023-08-20 20:24:11 -04:00 · dd96940c67
parent 22a89031c3
commit dd96940c67
13 changed files with 242 additions and 144 deletions
--- a/crow_test/data/db/users.sqlite
+++ b/crow_test/data/db/users.sqlite
--- a/crow_test/data/session/.expirations
+++ b/crow_test/data/session/.expirations
@ -1,2 +1 @@
-l5yQfzNDLXuq6Ic1 1692481044
+aubTl45vzPn5feHw 1692660008
 7IgWbfRRG3liKhkP 1692489339
--- a/crow_test/data/session/7IgWbfRRG3liKhkP.json
+++ b/crow_test/data/session/7IgWbfRRG3liKhkP.json
@ -1 +0,0 @@
 {"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"TF/rwm67DntB0hrdGiPpYRPFvnZ786r8nrZ4+WQ6wUang4xbqNaZ0AUpXKcHeswaC+IwR0891JZtXP+4XcHsQA=="}
--- a/crow_test/data/session/aubTl45vzPn5feHw.json
+++ b/crow_test/data/session/aubTl45vzPn5feHw.json
@ -0,0 +1 @@
 {"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"Tcl8i/S1Czz+UGS6NzeRu/Hyk66oJjYbsRsm3tPqd/AVt2yAVbFEEi/oGdaoIlTriQf5TX7heYPxqdcGMmLRVg=="}
--- a/crow_test/data/session/l5yQfzNDLXuq6Ic1.json
+++ b/crow_test/data/session/l5yQfzNDLXuq6Ic1.json
@ -1 +0,0 @@
 {"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"qJNyHpcA1b1EY6/so2rH3FhBilMKDDnCJSliaeytHMh4V0kj2yy4Mb1UEO7dW/uYdgfNWn73dwCrioovCe1NHg=="}
--- a/crow_test/webcontent/index.html
+++ b/crow_test/webcontent/index.html
@ -3,6 +3,12 @@
  <head>
    <link rel="stylesheet" href="/static/css/home.css">
    <link rel="stylesheet" href="/static/css/bar.css">
    <link rel="javascript" href="cdnjs.cloudflare.com/ajax/libs/jquery/2.1.3/jquery.min.js">
    <meta charset="UTF-8">
    <meta name="description" content="">
    <meta name="keywords" content="">
    <meta name="author" content="Brett">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{{$SITE_TITLE}}</title>
  </head>
  <body class="blur-bgimage" style="background-image: url({{$SITE_BACKGROUND}})">
@ -14,6 +20,9 @@
            </div>
            <div class="center">
              HAXsdsad
              {{#_admin}}
                Admin detected
              {{/_admin}}
            </div>
        </div>
    </div>
--- a/crow_test/webcontent/not_authorized.response
+++ b/crow_test/webcontent/not_authorized.response
--- a/include/crowsite/site/cache.h
+++ b/include/crowsite/site/cache.h
@ -39,6 +39,7 @@ namespace cs {
             * @return memory usage of the pages cache in bytes
             */
            uint64_t calculateMemoryUsage();
            void resolveLinks(const std::string& file, HTMLPage& page);
            void loadPage(const std::string& path);
            /**
             * Prunes the cache starting with the oldest pages we have loaded. (in bytes)
--- a/include/crowsite/site/web.h
+++ b/include/crowsite/site/web.h
@ -8,9 +8,56 @@
 #include <memory>
 #include <string>
 #include <crowsite/config.h>
 #include <utility>
 namespace cs {
    struct StringLexer
    {
        private:
            std::string str;
            size_t index = 0;
        public:
            explicit StringLexer(std::string str): str(std::move(str))
            {}
            inline bool hasNext()
            {
                if (index >= str.size())
                    return false;
                return true;
            }
            inline bool hasTemplatePrefix(char c)
            {
                if (index + 2 >= str.size())
                    return false;
                return str[index] == '{' && str[index + 1] == '{' && str[index + 2] == c;
            }
            inline bool hasTemplateSuffix()
            {
                if (index + 1 >= str.size())
                    return false;
                return str[index] == '}' && str[index + 1] == '}';
            }
            inline void consumeTemplatePrefix()
            {
                index += 3;
            }
            inline void consumeTemplateSuffix()
            {
                index += 2;
            }
            inline char consume()
            {
                return str[index++];
            }
    };
    class StaticContext {
        private:
            HASHMAP<std::string, std::string> replacements;
@ -46,7 +93,7 @@ namespace cs {
             */
            std::string render(StaticContext& context);
-            inline std::string const& getRawSite() {
+            inline std::string& getRawSite() {
                return m_SiteData;
            }
    };
--- a/src/crowsite/site/cache.cpp
+++ b/src/crowsite/site/cache.cpp
@ -8,23 +8,28 @@
 #include <algorithm>
 #include <blt/std/time.h>
-namespace cs {
+namespace cs
 {
-    double toSeconds(uint64_t v){
+    double toSeconds(uint64_t v)
-        return (double)(v) / 1000000000.0;
+    {
        return (double) (v) / 1000000000.0;
    }
    CacheEngine::CacheEngine(StaticContext& context, const CacheSettings& settings): m_Context(context),
-                                                                                     m_Settings((settings)) {}
+                                                                                     m_Settings((settings))
    {}
-    uint64_t CacheEngine::calculateMemoryUsage(const std::string& path, const CacheEngine::CacheValue& value) {
+    uint64_t CacheEngine::calculateMemoryUsage(const std::string& path, const CacheEngine::CacheValue& value)
    {
        uint64_t pageContentSize = path.size() * sizeof(char);
        pageContentSize += value.page->getRawSite().size() * sizeof(char);
        pageContentSize += value.renderedPage.size() * sizeof(char);
        return pageContentSize;
    }
-    uint64_t CacheEngine::calculateMemoryUsage() {
+    uint64_t CacheEngine::calculateMemoryUsage()
    {
        auto pagesBaseSize = m_Pages.size() * sizeof(CacheValue);
        uint64_t pageContentSizes = 0;
@ -34,24 +39,30 @@ namespace cs {
        return pagesBaseSize + pageContentSizes;
    }
-    const std::string& CacheEngine::fetch(const std::string& path) {
+    const std::string& CacheEngine::fetch(const std::string& path)
    {
        bool load = false;
        auto find = m_Pages.find(path);
-        if (find == m_Pages.end()){
+        if (find == m_Pages.end())
        {
            BLT_DEBUG("Page '%s' was not found in cache, loading now!", path.c_str());
            load = true;
-        } else {
+        } else
        {
            auto lastWrite = std::filesystem::last_write_time(cs::fs::createWebFilePath(path));
-            if (lastWrite != m_Pages[path].lastModified) {
+            if (lastWrite != m_Pages[path].lastModified)
            {
                load = true;
                BLT_DEBUG("Page '%s' has been modified! Reloading now!", path.c_str());
            }
        }
-        if (load) {
+        if (load)
        {
            auto memory = calculateMemoryUsage();
-            if (memory > m_Settings.hardMaxMemory) {
+            if (memory > m_Settings.hardMaxMemory)
            {
                BLT_WARN("Hard memory limit was reached! Pruning to soft limit now!");
                prune(
                        m_Settings.hardMaxMemory - m_Settings.softMaxMemory
@ -59,7 +70,8 @@ namespace cs {
                );
            }
-            if (memory > m_Settings.softMaxMemory) {
+            if (memory > m_Settings.softMaxMemory)
            {
                auto amount = std::min(m_Settings.softPruneAmount, memory - m_Settings.softMaxMemory);
                BLT_INFO("Soft memory limit was reached! Pruning %d bytes of memory", amount);
                prune(amount);
@ -73,11 +85,13 @@ namespace cs {
        return m_Pages[path].renderedPage;
    }
-    void CacheEngine::loadPage(const std::string& path) {
+    void CacheEngine::loadPage(const std::string& path)
    {
        auto start = blt::system::getCurrentTimeNanoseconds();
        auto fullPath = cs::fs::createWebFilePath(path);
        auto page = HTMLPage::load(fullPath);
        resolveLinks(path, *page);
        auto renderedPage = page->render(m_Context);
        m_Pages[path] = CacheValue{
                blt::system::getCurrentTimeNanoseconds(),
@ -90,8 +104,10 @@ namespace cs {
        BLT_INFO("Loaded page %s in %fms", path.c_str(), (end - start) / 1000000.0);
    }
-    void CacheEngine::prune(uint64_t amount) {
+    void CacheEngine::prune(uint64_t amount)
-        struct CacheSorting_t {
+    {
        struct CacheSorting_t
        {
            uint64_t memoryUsage;
            std::string key;
        };
@ -100,15 +116,19 @@ namespace cs {
        for (auto& page : m_Pages)
            cachedPages.emplace_back(calculateMemoryUsage(page.first, page.second), page.first);
-        std::sort(cachedPages.begin(), cachedPages.end(), [&](const CacheSorting_t& i1, const CacheSorting_t& i2) -> bool {
+        std::sort(
-            return m_Pages[i1.key].cacheTime < m_Pages[i2.key].cacheTime;
+                cachedPages.begin(), cachedPages.end(), [&](const CacheSorting_t& i1, const CacheSorting_t& i2) -> bool {
-        });
+                    return m_Pages[i1.key].cacheTime < m_Pages[i2.key].cacheTime;
                }
        );
        uint64_t prunedAmount = 0;
        uint64_t prunedPages = 0;
-        while (prunedAmount < amount){
+        while (prunedAmount < amount)
        {
            auto page = cachedPages[0];
-            BLT_TRACE("Pruning page (%d bytes) aged %f seconds", page.memoryUsage, toSeconds(blt::system::getCurrentTimeNanoseconds() - m_Pages[page.key].cacheTime));
+            BLT_TRACE("Pruning page (%d bytes) aged %f seconds", page.memoryUsage,
                      toSeconds(blt::system::getCurrentTimeNanoseconds() - m_Pages[page.key].cacheTime));
            prunedAmount += page.memoryUsage;
            m_Pages.erase(page.key);
            prunedPages++;
@ -117,5 +137,49 @@ namespace cs {
        BLT_INFO("Pruned %d pages", prunedPages);
    }
    void CacheEngine::resolveLinks(const std::string& file, HTMLPage& page)
    {
        StringLexer lexer(page.getRawSite());
        std::string resolvedSite;
        const std::string valid_file_endings[3] = {
                ".css",
                ".js",
                ".part",
        };
        while (lexer.hasNext())
        {
            if (lexer.hasTemplatePrefix('@'))
            {
                lexer.consumeTemplatePrefix();
                std::string token;
                while (!lexer.hasTemplateSuffix()) {
                    if (!lexer.hasNext()) {
                        BLT_WARN("Invalid template syntax. EOF occurred before template was fully processed!");
                        break;
                    }
                    token += lexer.consume();
                }
                lexer.consumeTemplateSuffix();
                for (const auto& suffix : valid_file_endings){
                    if (token.ends_with(suffix)) {
                        auto path = cs::fs::createWebFilePath(token);
                        if (path == file){
                            BLT_WARN("Recursive load detected!");
                            BLT_WARN("Caching Engine will ignore this attempt, however, it is recommended that you remove the recursive call.");
                            BLT_WARN("Detected in file '%s' offending link '%s'", file.c_str(), token.c_str());
                        }
                        resolvedSite += fetch(path);
                        break;
                    }
                }
            } else
                resolvedSite += lexer.consume();
        }
        page.getRawSite() = resolvedSite;
    }
 }
--- a/src/crowsite/site/web.cpp
+++ b/src/crowsite/site/web.cpp
@ -10,88 +10,48 @@
 #include <sstream>
 #include <algorithm>
-namespace cs {
+namespace cs
 {
-    class LexerSyntaxException : public std::runtime_error {
+    class LexerSyntaxException : public std::runtime_error
    {
        public:
            explicit LexerSyntaxException(const std::string& token):
                    std::runtime_error(
                            "Extended-mustache syntax error! An opening '{{' must be closed by '}}'! (near: '" +
                            token + "')"
-                    ) {}
+                    )
            {}
    };
-    class LexerException : public std::runtime_error {
+    class LexerException : public std::runtime_error
    {
        public:
            explicit LexerException(const std::string& message):
-                    std::runtime_error("Extended-mustache syntax processing error! " + message) {}
+                    std::runtime_error("Extended-mustache syntax processing error! " + message)
            {}
    };
-    class SyntaxException : public std::runtime_error {
+    class SyntaxException : public std::runtime_error
    {
        public:
            explicit SyntaxException():
                    std::runtime_error(
                            "Extended-mustache syntax error! Static context keys should not contain $"
-                    ) {}
+                    )
            {}
    };
-    class StringLexer {
+    std::unique_ptr<HTMLPage> HTMLPage::load(const std::string& path)
-        private:
+    {
            const std::string& str;
            size_t pos = 0;
        public:
            explicit StringLexer(const std::string& str): str(str) {}
            inline char nextToken() {
                if (pos >= str.size())
                    return '\0';
                return str[pos++];
            }
            inline bool hasTokens() {
                return pos < str.size();
            }
            /**
             * Tries to find the string 'match' and outputs all found characters to 'out'
             * @param match string to match against
             * @param out characters 'tokens' read by the lexer
             * @return true if found false otherwise;
             */
            inline bool findNext(const std::string& match, std::string& out) {
                char c;
                size_t p = 0;
                std::string found;
                while ((c = nextToken())) {
                    // check for match, p should be 0 here!
                    if (c == match[p]) {
                        do {
                            found += c;
                            // emit token
                            out += c;
                            if (found == match){
                                // TODO?
                            }
                            if (c != match[p++]){
                                p = 0;
                                found = "";
                                break;
                            }
                        } while ((c = nextToken()));
                    } else // emit token
                        out += c;
                }
                return false;
            }
    };
    std::unique_ptr<HTMLPage> HTMLPage::load(const std::string& path) {
        std::string htmlSource;
        std::ifstream htmlFile;
        if (!htmlFile.good())
            BLT_ERROR("Input stream not good!\n");
        // ensure we can throw exceptions:
        htmlFile.exceptions(std::ifstream::failbit | std::ifstream::badbit);
-        try {
+        try
        {
            // open file
            htmlFile.open(path);
            std::stringstream htmlAsStream;
@ -101,7 +61,8 @@ namespace cs {
            htmlFile.close();
            // convert stream into std::string
            htmlSource = htmlAsStream.str();
-        } catch (std::ifstream::failure& e) {
+        } catch (std::ifstream::failure& e)
        {
            BLT_ERROR("Unable to read file '%s'!\n", path.c_str());
            BLT_ERROR("Exception: %s", e.what());
            throw std::runtime_error("Failed to read file!\n");
@ -109,63 +70,53 @@ namespace cs {
        return std::make_unique<HTMLPage>(HTMLPage(htmlSource));
    }
-    HTMLPage::HTMLPage(std::string siteData): m_SiteData(std::move(siteData)) {}
+    HTMLPage::HTMLPage(std::string siteData): m_SiteData(std::move(siteData))
    {}
-    std::string HTMLPage::render(StaticContext& context) {
+    std::string HTMLPage::render(StaticContext& context)
    {
        std::string processedSiteData = m_SiteData;
        std::string buffer;
        StringLexer lexer(processedSiteData);
-        while (lexer.hasTokens()) {
+        while (lexer.hasNext())
-            char c;
+        {
-            switch ((c = lexer.nextToken())) {
+            if (lexer.hasTemplatePrefix('$'))
-                case '{':
+            {
-                    // if we are dealing with a mustache template then we should process
+                lexer.consumeTemplatePrefix();
-                    if ((c = lexer.nextToken()) == '{') {
+                std::string token;
-                        // if it is not the extended syntax we are looking for, skip it as crow will handle it at request time!
+                while (!lexer.hasTemplateSuffix())
-                        if ((c = lexer.nextToken()) != '$') {
+                {
-                            buffer += "{{";
+                    if (!lexer.hasNext())
-                            buffer += c;
+                    {
-                            break;
+                        BLT_FATAL("Invalid template syntax. EOF occurred before template was fully processed!");
-                        }
+                        throw LexerSyntaxException(token);
                        std::string tokenString;
                        while ((c = lexer.nextToken())) {
                            if (c == '}') {
                                if (lexer.nextToken() != '}')
                                    throw LexerSyntaxException(tokenString);
                                else {
                                    if (std::find_if(
                                            context.begin(), context.end(),
                                            [&tokenString](auto in) -> bool {
                                                return tokenString == in.first;
                                            }
                                    ) == context.end()) {
                                        // unable to find the token, we should throw an error to tell the user! (or admin in this case)
                                        BLT_WARN("Unable to find token '%s'!", tokenString.c_str());
                                    } else
                                        buffer += context[tokenString];
                                    break;
                                }
                            }
                            tokenString += c;
                        }
                    } else { // otherwise we should write out the characters since this isn't a extended template
                        buffer += '{';
                        buffer += c;
                    }
-                    break;
+                    token += lexer.consume();
-                default:
+                }
-                    buffer += c;
+                lexer.consumeTemplateSuffix();
-                    break;
+                if (std::find_if(
-            }
+                        context.begin(), context.end(),
                        [&token](auto in) -> bool {
                            return token == in.first;
                        }
                ) == context.end())
                {
                    // unable to find the token, we should throw an error to tell the user! (or admin in this case)
                    BLT_WARN("Unable to find token '%s'!", token.c_str());
                } else
                    buffer += context[token];
            } else
                buffer += lexer.consume();
        }
        return buffer;
    }
-    void HTMLPage::resolveResources() {
+    void HTMLPage::resolveResources()
    {
    }
 }
--- a/src/crowsite/utility.cpp
+++ b/src/crowsite/utility.cpp
@ -5,12 +5,16 @@
 #include <blt/std/string.h>
 #include <curl/curl.h>
-namespace cs {
+namespace cs
 {
-    namespace parser {
+    namespace parser
-        Post::Post(const std::string& input) {
+    {
        Post::Post(const std::string& input)
        {
            auto pairs = blt::string::split(input, "&");
-            for (const auto& pair : pairs) {
+            for (const auto& pair : pairs)
            {
                auto kv = blt::string::split(pair, "=");
                auto key = kv[0];
                auto value = kv[1];
@ -22,13 +26,16 @@ namespace cs {
            }
        }
-        const std::string& Post::operator[](const std::string& key) {
+        const std::string& Post::operator[](const std::string& key)
        {
            return m_Values[key];
        }
-        std::string Post::dump() {
+        std::string Post::dump()
        {
            std::string out;
-            for (const auto& pair : m_Values) {
+            for (const auto& pair : m_Values)
            {
                out += "[";
                out += pair.first;
                out += "] = ";
@ -44,8 +51,10 @@ namespace cs {
        }
    }
-    namespace fs {
+    namespace fs
-        std::string createStaticFilePath(const std::string& file) {
+    {
        std::string createStaticFilePath(const std::string& file)
        {
            auto path = std::string(CROW_STATIC_DIRECTORY);
            if (!path.ends_with('/'))
                path += '/';
@ -55,7 +64,9 @@ namespace cs {
                throw std::runtime_error("Unable to create file path because file does not exist!");
            return path;
        }
-        std::string createWebFilePath(const std::string& file){
+        
        std::string createWebFilePath(const std::string& file)
        {
            auto path = std::string(SITE_FILES_PATH);
            if (!path.ends_with('/'))
                path += '/';
--- a/src/main.cpp
+++ b/src/main.cpp
@ -123,23 +123,39 @@ crow::response handle_root_page(const site_params& params)
 //                    BLT_TRACE("URL: %s = %s", v.c_str(), req.url_params.get(v));
    if (params.name.ends_with(".html"))
    {
        checkAndUpdateUserSession(params.app, params.req);
        auto& session = params.app.get_context<Session>(params.req);
        auto s_clientID = session.get("clientID", "");
        auto s_clientToken = session.get("clientToken", "");
        auto user_perms = cs::getUserPermissions(cs::getUserFromID(s_clientID));
        crow::mustache::context ctx;
-        // we don't want to pass all get parameters to the context to prevent leaking
+        
        // pass perms in
        if (user_perms & cs::PERM_ADMIN)
            ctx["_admin"] = true;
        if (cs::isUserLoggedIn(s_clientID, s_clientToken))
        {
            ctx["_logged_in"] = true;
        }
        // we don't want to pass all get parameters to the context to prevent leaking information
        auto referer = params.req.url_params.get("referer");
        if (referer)
            ctx["referer"] = referer;
        auto page = crow::mustache::compile(params.engine.fetch(params.name));
        return page.render(ctx);
    }
 //                crow::mustache::context ctx({{"person", name}});
 //                auto user_page = crow::mustache::compile(engine.fetch("index.html"));
    return params.engine.fetch("default.html");
 }
-crow::response handle_auth_page(const site_params& params, uint32_t required_perms)
+crow::response handle_auth_page(const site_params& params)
 {
    if (isUserAdmin(params.app, params.req))
        return redirect("/login.html");
    return handle_root_page(params);
@ -247,6 +263,7 @@ int main(int argc, const char** argv)
                    if (!cs::storeUserData(pp["username"], user_agent, data))
                    {
                        BLT_ERROR("Failed to update user data");
                        return redirect("login.html");
                    }
                    session.set("clientID", data.clientID);
`@ -1,2 +1 @@`
	`l5yQfzNDLXuq6Ic1 1692481044`	`aubTl45vzPn5feHw 1692660008`
	`7IgWbfRRG3liKhkP 1692489339`
		`@ -1 +0,0 @@`
			`{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"TF/rwm67DntB0hrdGiPpYRPFvnZ786r8nrZ4+WQ6wUang4xbqNaZ0AUpXKcHeswaC+IwR0891JZtXP+4XcHsQA=="}`
		`@ -0,0 +1 @@`
							`{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"Tcl8i/S1Czz+UGS6NzeRu/Hyk66oJjYbsRsm3tPqd/AVt2yAVbFEEi/oGdaoIlTriQf5TX7heYPxqdcGMmLRVg=="}`