Upgrade StringLexer with better design principles.

Strings are now processed in a much clearer, more intuitive way
2023-08-20 20:24:11 -04:00 · 2023-08-20 20:24:11 -04:00 · dd96940c67
parent 22a89031c3
commit dd96940c67
13 changed files with 242 additions and 144 deletions
--- a/crow_test/data/db/users.sqlite
+++ b/crow_test/data/db/users.sqlite
--- a/crow_test/data/session/.expirations
+++ b/crow_test/data/session/.expirations
@ -1,2 +1 @@
-l5yQfzNDLXuq6Ic1 1692481044
-7IgWbfRRG3liKhkP 1692489339
+aubTl45vzPn5feHw 1692660008
--- a/crow_test/data/session/7IgWbfRRG3liKhkP.json
+++ b/crow_test/data/session/7IgWbfRRG3liKhkP.json
@ -1 +0,0 @@
-{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"TF/rwm67DntB0hrdGiPpYRPFvnZ786r8nrZ4+WQ6wUang4xbqNaZ0AUpXKcHeswaC+IwR0891JZtXP+4XcHsQA=="}
--- a/crow_test/data/session/aubTl45vzPn5feHw.json
+++ b/crow_test/data/session/aubTl45vzPn5feHw.json
@ -0,0 +1 @@
+{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"Tcl8i/S1Czz+UGS6NzeRu/Hyk66oJjYbsRsm3tPqd/AVt2yAVbFEEi/oGdaoIlTriQf5TX7heYPxqdcGMmLRVg=="}
--- a/crow_test/data/session/l5yQfzNDLXuq6Ic1.json
+++ b/crow_test/data/session/l5yQfzNDLXuq6Ic1.json
@ -1 +0,0 @@
-{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"qJNyHpcA1b1EY6/so2rH3FhBilMKDDnCJSliaeytHMh4V0kj2yy4Mb1UEO7dW/uYdgfNWn73dwCrioovCe1NHg=="}
--- a/crow_test/webcontent/index.html
+++ b/crow_test/webcontent/index.html
@ -3,6 +3,12 @@
  <head>
    <link rel="stylesheet" href="/static/css/home.css">
    <link rel="stylesheet" href="/static/css/bar.css">
+    <link rel="javascript" href="cdnjs.cloudflare.com/ajax/libs/jquery/2.1.3/jquery.min.js">
+    <meta charset="UTF-8">
+    <meta name="description" content="">
+    <meta name="keywords" content="">
+    <meta name="author" content="Brett">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{{$SITE_TITLE}}</title>
  </head>
  <body class="blur-bgimage" style="background-image: url({{$SITE_BACKGROUND}})">
@ -14,6 +20,9 @@
            </div>
            <div class="center">
              HAXsdsad
+              {{#_admin}}
+                Admin detected
+              {{/_admin}}
            </div>
        </div>
    </div>
--- a/crow_test/webcontent/not_authorized.response
+++ b/crow_test/webcontent/not_authorized.response
--- a/include/crowsite/site/cache.h
+++ b/include/crowsite/site/cache.h
@ -39,6 +39,7 @@ namespace cs {
             * @return memory usage of the pages cache in bytes
             */
            uint64_t calculateMemoryUsage();
+            void resolveLinks(const std::string& file, HTMLPage& page);
            void loadPage(const std::string& path);
            /**
             * Prunes the cache starting with the oldest pages we have loaded. (in bytes)
--- a/include/crowsite/site/web.h
+++ b/include/crowsite/site/web.h
@ -8,9 +8,56 @@
 #include <memory>
 #include <string>
 #include <crowsite/config.h>
+#include <utility>

 namespace cs {
    
+    struct StringLexer
+    {
+        private:
+            std::string str;
+            size_t index = 0;
+        public:
+            explicit StringLexer(std::string str): str(std::move(str))
+            {}
+            
+            inline bool hasNext()
+            {
+                if (index >= str.size())
+                    return false;
+                return true;
+            }
+            
+            inline bool hasTemplatePrefix(char c)
+            {
+                if (index + 2 >= str.size())
+                    return false;
+                return str[index] == '{' && str[index + 1] == '{' && str[index + 2] == c;
+            }
+            
+            inline bool hasTemplateSuffix()
+            {
+                if (index + 1 >= str.size())
+                    return false;
+                return str[index] == '}' && str[index + 1] == '}';
+            }
+            
+            inline void consumeTemplatePrefix()
+            {
+                index += 3;
+            }
+            
+            inline void consumeTemplateSuffix()
+            {
+                index += 2;
+            }
+            
+            inline char consume()
+            {
+                return str[index++];
+            }
+    };
+    
    class StaticContext {
        private:
            HASHMAP<std::string, std::string> replacements;
@ -46,7 +93,7 @@ namespace cs {
             */
            std::string render(StaticContext& context);
            
-            inline std::string const& getRawSite() {
+            inline std::string& getRawSite() {
                return m_SiteData;
            }
    };
--- a/src/crowsite/site/cache.cpp
+++ b/src/crowsite/site/cache.cpp
@ -8,23 +8,28 @@
 #include <algorithm>
 #include <blt/std/time.h>

-namespace cs {
+namespace cs
+{
    
-    double toSeconds(uint64_t v){
-        return (double)(v) / 1000000000.0;
+    double toSeconds(uint64_t v)
+    {
+        return (double) (v) / 1000000000.0;
    }
    
    CacheEngine::CacheEngine(StaticContext& context, const CacheSettings& settings): m_Context(context),
-                                                                                     m_Settings((settings)) {}
+                                                                                     m_Settings((settings))
+    {}
    
-    uint64_t CacheEngine::calculateMemoryUsage(const std::string& path, const CacheEngine::CacheValue& value) {
+    uint64_t CacheEngine::calculateMemoryUsage(const std::string& path, const CacheEngine::CacheValue& value)
+    {
        uint64_t pageContentSize = path.size() * sizeof(char);
        pageContentSize += value.page->getRawSite().size() * sizeof(char);
        pageContentSize += value.renderedPage.size() * sizeof(char);
        return pageContentSize;
    }
    
-    uint64_t CacheEngine::calculateMemoryUsage() {
+    uint64_t CacheEngine::calculateMemoryUsage()
+    {
        auto pagesBaseSize = m_Pages.size() * sizeof(CacheValue);
        
        uint64_t pageContentSizes = 0;
@ -34,24 +39,30 @@ namespace cs {
        return pagesBaseSize + pageContentSizes;
    }
    
-    const std::string& CacheEngine::fetch(const std::string& path) {
+    const std::string& CacheEngine::fetch(const std::string& path)
+    {
        bool load = false;
        auto find = m_Pages.find(path);
-        if (find == m_Pages.end()){
+        if (find == m_Pages.end())
+        {
            BLT_DEBUG("Page '%s' was not found in cache, loading now!", path.c_str());
            load = true;
-        } else {
+        } else
+        {
            auto lastWrite = std::filesystem::last_write_time(cs::fs::createWebFilePath(path));
-            if (lastWrite != m_Pages[path].lastModified) {
+            if (lastWrite != m_Pages[path].lastModified)
+            {
                load = true;
                BLT_DEBUG("Page '%s' has been modified! Reloading now!", path.c_str());
            }
        }
        
-        if (load) {
+        if (load)
+        {
            auto memory = calculateMemoryUsage();
            
-            if (memory > m_Settings.hardMaxMemory) {
+            if (memory > m_Settings.hardMaxMemory)
+            {
                BLT_WARN("Hard memory limit was reached! Pruning to soft limit now!");
                prune(
                        m_Settings.hardMaxMemory - m_Settings.softMaxMemory
@ -59,7 +70,8 @@ namespace cs {
                );
            }
            
-            if (memory > m_Settings.softMaxMemory) {
+            if (memory > m_Settings.softMaxMemory)
+            {
                auto amount = std::min(m_Settings.softPruneAmount, memory - m_Settings.softMaxMemory);
                BLT_INFO("Soft memory limit was reached! Pruning %d bytes of memory", amount);
                prune(amount);
@ -73,11 +85,13 @@ namespace cs {
        return m_Pages[path].renderedPage;
    }
    
-    void CacheEngine::loadPage(const std::string& path) {
+    void CacheEngine::loadPage(const std::string& path)
+    {
        auto start = blt::system::getCurrentTimeNanoseconds();
        
        auto fullPath = cs::fs::createWebFilePath(path);
        auto page = HTMLPage::load(fullPath);
+        resolveLinks(path, *page);
        auto renderedPage = page->render(m_Context);
        m_Pages[path] = CacheValue{
                blt::system::getCurrentTimeNanoseconds(),
@ -90,8 +104,10 @@ namespace cs {
        BLT_INFO("Loaded page %s in %fms", path.c_str(), (end - start) / 1000000.0);
    }
    
-    void CacheEngine::prune(uint64_t amount) {
-        struct CacheSorting_t {
+    void CacheEngine::prune(uint64_t amount)
+    {
+        struct CacheSorting_t
+        {
            uint64_t memoryUsage;
            std::string key;
        };
@ -100,15 +116,19 @@ namespace cs {
        for (auto& page : m_Pages)
            cachedPages.emplace_back(calculateMemoryUsage(page.first, page.second), page.first);
        
-        std::sort(cachedPages.begin(), cachedPages.end(), [&](const CacheSorting_t& i1, const CacheSorting_t& i2) -> bool {
+        std::sort(
+                cachedPages.begin(), cachedPages.end(), [&](const CacheSorting_t& i1, const CacheSorting_t& i2) -> bool {
                    return m_Pages[i1.key].cacheTime < m_Pages[i2.key].cacheTime;
-        });
+                }
+        );
        
        uint64_t prunedAmount = 0;
        uint64_t prunedPages = 0;
-        while (prunedAmount < amount){
+        while (prunedAmount < amount)
+        {
            auto page = cachedPages[0];
-            BLT_TRACE("Pruning page (%d bytes) aged %f seconds", page.memoryUsage, toSeconds(blt::system::getCurrentTimeNanoseconds() - m_Pages[page.key].cacheTime));
+            BLT_TRACE("Pruning page (%d bytes) aged %f seconds", page.memoryUsage,
+                      toSeconds(blt::system::getCurrentTimeNanoseconds() - m_Pages[page.key].cacheTime));
            prunedAmount += page.memoryUsage;
            m_Pages.erase(page.key);
            prunedPages++;
@ -117,5 +137,49 @@ namespace cs {
        BLT_INFO("Pruned %d pages", prunedPages);
    }
    
+    void CacheEngine::resolveLinks(const std::string& file, HTMLPage& page)
+    {
+        StringLexer lexer(page.getRawSite());
+        std::string resolvedSite;
+        
+        const std::string valid_file_endings[3] = {
+                ".css",
+                ".js",
+                ".part",
+        };
+        
+        while (lexer.hasNext())
+        {
+            if (lexer.hasTemplatePrefix('@'))
+            {
+                lexer.consumeTemplatePrefix();
+                std::string token;
+                while (!lexer.hasTemplateSuffix()) {
+                    if (!lexer.hasNext()) {
+                        BLT_WARN("Invalid template syntax. EOF occurred before template was fully processed!");
+                        break;
+                    }
+                    token += lexer.consume();
+                }
+                lexer.consumeTemplateSuffix();
+                for (const auto& suffix : valid_file_endings){
+                    if (token.ends_with(suffix)) {
+                        auto path = cs::fs::createWebFilePath(token);
+                        if (path == file){
+                            BLT_WARN("Recursive load detected!");
+                            BLT_WARN("Caching Engine will ignore this attempt, however, it is recommended that you remove the recursive call.");
+                            BLT_WARN("Detected in file '%s' offending link '%s'", file.c_str(), token.c_str());
+                        }
+                        resolvedSite += fetch(path);
+                        break;
+                    }
+                }
+            } else
+                resolvedSite += lexer.consume();
+        }
+        
+        page.getRawSite() = resolvedSite;
+    }
+    
    
 }
--- a/src/crowsite/site/web.cpp
+++ b/src/crowsite/site/web.cpp
@ -10,88 +10,48 @@
 #include <sstream>
 #include <algorithm>

-namespace cs {
+namespace cs
+{
    
-    class LexerSyntaxException : public std::runtime_error {
+    class LexerSyntaxException : public std::runtime_error
+    {
        public:
            explicit LexerSyntaxException(const std::string& token):
                    std::runtime_error(
                            "Extended-mustache syntax error! An opening '{{' must be closed by '}}'! (near: '" +
                            token + "')"
-                    ) {}
+                    )
+            {}
    };
    
-    class LexerException : public std::runtime_error {
+    class LexerException : public std::runtime_error
+    {
        public:
            explicit LexerException(const std::string& message):
-                    std::runtime_error("Extended-mustache syntax processing error! " + message) {}
+                    std::runtime_error("Extended-mustache syntax processing error! " + message)
+            {}
    };
    
-    class SyntaxException : public std::runtime_error {
+    class SyntaxException : public std::runtime_error
+    {
        public:
            explicit SyntaxException():
                    std::runtime_error(
                            "Extended-mustache syntax error! Static context keys should not contain $"
-                    ) {}
+                    )
+            {}
    };
    
-    class StringLexer {
-        private:
-            const std::string& str;
-            size_t pos = 0;
-        public:
-            explicit StringLexer(const std::string& str): str(str) {}
-            
-            inline char nextToken() {
-                if (pos >= str.size())
-                    return '\0';
-                return str[pos++];
-            }
-            
-            inline bool hasTokens() {
-                return pos < str.size();
-            }
-            /**
-             * Tries to find the string 'match' and outputs all found characters to 'out'
-             * @param match string to match against
-             * @param out characters 'tokens' read by the lexer
-             * @return true if found false otherwise;
-             */
-            inline bool findNext(const std::string& match, std::string& out) {
-                char c;
-                size_t p = 0;
-                std::string found;
-                while ((c = nextToken())) {
-                    // check for match, p should be 0 here!
-                    if (c == match[p]) {
-                        do {
-                            found += c;
-                            // emit token
-                            out += c;
-                            if (found == match){
-                                // TODO?
-                            }
-                            if (c != match[p++]){
-                                p = 0;
-                                found = "";
-                                break;
-                            }
-                        } while ((c = nextToken()));
-                    } else // emit token
-                        out += c;
-                }
-                return false;
-            }
-    };
-    
-    std::unique_ptr<HTMLPage> HTMLPage::load(const std::string& path) {
+    std::unique_ptr<HTMLPage> HTMLPage::load(const std::string& path)
+    {
        std::string htmlSource;
        std::ifstream htmlFile;
        if (!htmlFile.good())
            BLT_ERROR("Input stream not good!\n");
        // ensure we can throw exceptions:
        htmlFile.exceptions(std::ifstream::failbit | std::ifstream::badbit);
-        try {
+        try
+        {
            // open file
            htmlFile.open(path);
            std::stringstream htmlAsStream;
@ -101,7 +61,8 @@ namespace cs {
            htmlFile.close();
            // convert stream into std::string
            htmlSource = htmlAsStream.str();
-        } catch (std::ifstream::failure& e) {
+        } catch (std::ifstream::failure& e)
+        {
            BLT_ERROR("Unable to read file '%s'!\n", path.c_str());
            BLT_ERROR("Exception: %s", e.what());
            throw std::runtime_error("Failed to read file!\n");
@ -109,63 +70,53 @@ namespace cs {
        return std::make_unique<HTMLPage>(HTMLPage(htmlSource));
    }
    
-    HTMLPage::HTMLPage(std::string siteData): m_SiteData(std::move(siteData)) {}
+    HTMLPage::HTMLPage(std::string siteData): m_SiteData(std::move(siteData))
+    {}
    
-    std::string HTMLPage::render(StaticContext& context) {
+    std::string HTMLPage::render(StaticContext& context)
+    {
        std::string processedSiteData = m_SiteData;
        
        std::string buffer;
        
        StringLexer lexer(processedSiteData);
        
-        while (lexer.hasTokens()) {
-            char c;
-            switch ((c = lexer.nextToken())) {
-                case '{':
-                    // if we are dealing with a mustache template then we should process
-                    if ((c = lexer.nextToken()) == '{') {
-                        // if it is not the extended syntax we are looking for, skip it as crow will handle it at request time!
-                        if ((c = lexer.nextToken()) != '$') {
-                            buffer += "{{";
-                            buffer += c;
-                            break;
+        while (lexer.hasNext())
+        {
+            if (lexer.hasTemplatePrefix('$'))
+            {
+                lexer.consumeTemplatePrefix();
+                std::string token;
+                while (!lexer.hasTemplateSuffix())
+                {
+                    if (!lexer.hasNext())
+                    {
+                        BLT_FATAL("Invalid template syntax. EOF occurred before template was fully processed!");
+                        throw LexerSyntaxException(token);
                    }
-                        std::string tokenString;
-                        while ((c = lexer.nextToken())) {
-                            if (c == '}') {
-                                if (lexer.nextToken() != '}')
-                                    throw LexerSyntaxException(tokenString);
-                                else {
+                    token += lexer.consume();
+                }
+                lexer.consumeTemplateSuffix();
                if (std::find_if(
                        context.begin(), context.end(),
-                                            [&tokenString](auto in) -> bool {
-                                                return tokenString == in.first;
+                        [&token](auto in) -> bool {
+                            return token == in.first;
                        }
-                                    ) == context.end()) {
+                ) == context.end())
+                {
                    // unable to find the token, we should throw an error to tell the user! (or admin in this case)
-                                        BLT_WARN("Unable to find token '%s'!", tokenString.c_str());
+                    BLT_WARN("Unable to find token '%s'!", token.c_str());
                } else
-                                        buffer += context[tokenString];
-                                    break;
-                                }
-                            }
-                            tokenString += c;
-                        }
-                    } else { // otherwise we should write out the characters since this isn't a extended template
-                        buffer += '{';
-                        buffer += c;
-                    }
-                    break;
-                default:
-                    buffer += c;
-                    break;
-            }
+                    buffer += context[token];
+            } else
+                buffer += lexer.consume();
        }
        
        return buffer;
    }
    
-    void HTMLPage::resolveResources() {
+    void HTMLPage::resolveResources()
+    {
    
    }
 }
--- a/src/crowsite/utility.cpp
+++ b/src/crowsite/utility.cpp
@ -5,12 +5,16 @@
 #include <blt/std/string.h>
 #include <curl/curl.h>

-namespace cs {
+namespace cs
+{
    
-    namespace parser {
-        Post::Post(const std::string& input) {
+    namespace parser
+    {
+        Post::Post(const std::string& input)
+        {
            auto pairs = blt::string::split(input, "&");
-            for (const auto& pair : pairs) {
+            for (const auto& pair : pairs)
+            {
                auto kv = blt::string::split(pair, "=");
                auto key = kv[0];
                auto value = kv[1];
@ -22,13 +26,16 @@ namespace cs {
            }
        }
        
-        const std::string& Post::operator[](const std::string& key) {
+        const std::string& Post::operator[](const std::string& key)
+        {
            return m_Values[key];
        }
        
-        std::string Post::dump() {
+        std::string Post::dump()
+        {
            std::string out;
-            for (const auto& pair : m_Values) {
+            for (const auto& pair : m_Values)
+            {
                out += "[";
                out += pair.first;
                out += "] = ";
@ -44,8 +51,10 @@ namespace cs {
        }
    }
    
-    namespace fs {
-        std::string createStaticFilePath(const std::string& file) {
+    namespace fs
+    {
+        std::string createStaticFilePath(const std::string& file)
+        {
            auto path = std::string(CROW_STATIC_DIRECTORY);
            if (!path.ends_with('/'))
                path += '/';
@ -55,7 +64,9 @@ namespace cs {
                throw std::runtime_error("Unable to create file path because file does not exist!");
            return path;
        }
-        std::string createWebFilePath(const std::string& file){
+        
+        std::string createWebFilePath(const std::string& file)
+        {
            auto path = std::string(SITE_FILES_PATH);
            if (!path.ends_with('/'))
                path += '/';
--- a/src/main.cpp
+++ b/src/main.cpp
@ -123,8 +123,24 @@ crow::response handle_root_page(const site_params& params)
 //                    BLT_TRACE("URL: %s = %s", v.c_str(), req.url_params.get(v));
    if (params.name.ends_with(".html"))
    {
+        checkAndUpdateUserSession(params.app, params.req);
+        auto& session = params.app.get_context<Session>(params.req);
+        auto s_clientID = session.get("clientID", "");
+        auto s_clientToken = session.get("clientToken", "");
+        auto user_perms = cs::getUserPermissions(cs::getUserFromID(s_clientID));
+        
        crow::mustache::context ctx;
-        // we don't want to pass all get parameters to the context to prevent leaking
+        
+        // pass perms in
+        if (user_perms & cs::PERM_ADMIN)
+            ctx["_admin"] = true;
+        
+        if (cs::isUserLoggedIn(s_clientID, s_clientToken))
+        {
+            ctx["_logged_in"] = true;
+        }
+        
+        // we don't want to pass all get parameters to the context to prevent leaking information
        auto referer = params.req.url_params.get("referer");
        if (referer)
            ctx["referer"] = referer;
@ -132,14 +148,14 @@ crow::response handle_root_page(const site_params& params)
        return page.render(ctx);
    }
    
-//                crow::mustache::context ctx({{"person", name}});
-//                auto user_page = crow::mustache::compile(engine.fetch("index.html"));
-    
    return params.engine.fetch("default.html");
 }

-crow::response handle_auth_page(const site_params& params, uint32_t required_perms)
+crow::response handle_auth_page(const site_params& params)
 {
+    if (isUserAdmin(params.app, params.req))
+        return redirect("/login.html");
+    
    
    
    return handle_root_page(params);
@ -247,6 +263,7 @@ int main(int argc, const char** argv)
                    if (!cs::storeUserData(pp["username"], user_agent, data))
                    {
                        BLT_ERROR("Failed to update user data");
+                        return redirect("login.html");
                    }
                    
                    session.set("clientID", data.clientID);
				`@ -1 +0,0 @@`
				`{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"TF/rwm67DntB0hrdGiPpYRPFvnZ786r8nrZ4+WQ6wUang4xbqNaZ0AUpXKcHeswaC+IwR0891JZtXP+4XcHsQA=="}`
				`@ -0,0 +1 @@`
				`{"clientID":"50a21c33-66c4-5a0f-902f-9434632025e6","clientToken":"Tcl8i/S1Czz+UGS6NzeRu/Hyk66oJjYbsRsm3tPqd/AVt2yAVbFEEi/oGdaoIlTriQf5TX7heYPxqdcGMmLRVg=="}`