working on new lexers to allow for multiple conditions in statements
parent
477a230974
commit
aaf29c5fe7
|
@ -7,17 +7,31 @@
|
||||||
|
|
||||||
#include <crowsite/site/web.h>
|
#include <crowsite/site/web.h>
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
|
#include <blt/std/hashmap.h>
|
||||||
|
|
||||||
namespace cs {
|
namespace cs
|
||||||
|
{
|
||||||
|
|
||||||
class LexerSyntaxError : public std::runtime_error {
|
using RuntimeContext = HASHMAP<std::string, std::string>;
|
||||||
|
|
||||||
|
class LexerSyntaxError : public std::runtime_error
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
LexerSyntaxError(): std::runtime_error("Invalid template syntax. EOF occurred before template was fully processed!"){}
|
LexerSyntaxError(): std::runtime_error("Invalid template syntax. EOF occurred before template was fully processed!")
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
class LexerSearchFailure : public std::runtime_error
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit LexerSearchFailure(const std::string& str): std::runtime_error("The lexer failed to find ending for tag " + str)
|
||||||
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr uint64_t toMB = 1024 * 1024;
|
constexpr uint64_t toMB = 1024 * 1024;
|
||||||
|
|
||||||
struct CacheSettings {
|
struct CacheSettings
|
||||||
|
{
|
||||||
// amount to hard prune at when reached, note: the engine will reduce all the way down to soft max memory
|
// amount to hard prune at when reached, note: the engine will reduce all the way down to soft max memory
|
||||||
uint64_t hardMaxMemory = 2048 * toMB;
|
uint64_t hardMaxMemory = 2048 * toMB;
|
||||||
// it's more likely this will never be exceeded but the engine will make no attempt to prune more than softPruneAmount
|
// it's more likely this will never be exceeded but the engine will make no attempt to prune more than softPruneAmount
|
||||||
|
@ -26,9 +40,11 @@ namespace cs {
|
||||||
uint64_t softPruneAmount = 2 * toMB;
|
uint64_t softPruneAmount = 2 * toMB;
|
||||||
};
|
};
|
||||||
|
|
||||||
class CacheEngine {
|
class CacheEngine
|
||||||
|
{
|
||||||
private:
|
private:
|
||||||
struct CacheValue {
|
struct CacheValue
|
||||||
|
{
|
||||||
int64_t cacheTime;
|
int64_t cacheTime;
|
||||||
std::filesystem::file_time_type lastModified;
|
std::filesystem::file_time_type lastModified;
|
||||||
std::unique_ptr<HTMLPage> page;
|
std::unique_ptr<HTMLPage> page;
|
||||||
|
@ -40,19 +56,27 @@ namespace cs {
|
||||||
HASHMAP<std::string, CacheValue> m_Pages;
|
HASHMAP<std::string, CacheValue> m_Pages;
|
||||||
|
|
||||||
static uint64_t calculateMemoryUsage(const std::string& path, const CacheValue& value);
|
static uint64_t calculateMemoryUsage(const std::string& path, const CacheValue& value);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return memory usage of the pages cache in bytes
|
* @return memory usage of the pages cache in bytes
|
||||||
*/
|
*/
|
||||||
uint64_t calculateMemoryUsage();
|
uint64_t calculateMemoryUsage();
|
||||||
|
|
||||||
void resolveLinks(const std::string& file, HTMLPage& page);
|
void resolveLinks(const std::string& file, HTMLPage& page);
|
||||||
|
|
||||||
void loadPage(const std::string& path);
|
void loadPage(const std::string& path);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prunes the cache starting with the oldest pages we have loaded. (in bytes)
|
* Prunes the cache starting with the oldest pages we have loaded. (in bytes)
|
||||||
*/
|
*/
|
||||||
void prune(uint64_t amount);
|
void prune(uint64_t amount);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit CacheEngine(StaticContext& context, const CacheSettings& settings = {});
|
explicit CacheEngine(StaticContext& context, const CacheSettings& settings = {});
|
||||||
|
|
||||||
const std::string& fetch(const std::string& path);
|
const std::string& fetch(const std::string& path);
|
||||||
|
|
||||||
|
std::string fetch(const std::string& path, const RuntimeContext& context);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,34 +4,46 @@
|
||||||
#include <crowsite/site/cache.h>
|
#include <crowsite/site/cache.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "blt/std/logging.h"
|
#include "blt/std/logging.h"
|
||||||
|
#include "blt/std/string.h"
|
||||||
#include "crowsite/utility.h"
|
#include "crowsite/utility.h"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <blt/std/time.h>
|
#include <blt/std/time.h>
|
||||||
#include <blt/parse/mustache.h>
|
#include <optional>
|
||||||
|
|
||||||
namespace cs
|
namespace cs
|
||||||
{
|
{
|
||||||
struct StringLexer
|
class LexerBase
|
||||||
{
|
{
|
||||||
private:
|
protected:
|
||||||
std::string str;
|
std::string str;
|
||||||
size_t index = 0;
|
size_t index = 0;
|
||||||
public:
|
public:
|
||||||
explicit StringLexer(std::string str): str(std::move(str))
|
explicit LexerBase(std::string str): str(std::move(str))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
inline bool hasNext()
|
inline bool hasNext()
|
||||||
{
|
{
|
||||||
if (index >= str.size())
|
return index < str.size();
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool hasTemplatePrefix(char c)
|
inline char peekPrefix()
|
||||||
{
|
{
|
||||||
if (index + 2 >= str.size())
|
return str[index + 2];
|
||||||
return false;
|
}
|
||||||
return str[index] == '{' && str[index + 1] == '{' && str[index + 2] == c;
|
|
||||||
|
inline char consume()
|
||||||
|
{
|
||||||
|
return str[index++];
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void consumeTemplatePrefix()
|
||||||
|
{
|
||||||
|
index += 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void consumeTemplateSuffix()
|
||||||
|
{
|
||||||
|
index += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool hasTemplateSuffix()
|
inline bool hasTemplateSuffix()
|
||||||
|
@ -41,28 +53,19 @@ namespace cs
|
||||||
return str[index] == '}' && str[index + 1] == '}';
|
return str[index] == '}' && str[index + 1] == '}';
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void consumeTemplatePrefix()
|
|
||||||
{
|
|
||||||
// because any custom mustache syntax will have to have a prefix like '$' or '@'
|
|
||||||
// it is fine that we make the assumption of 3 characters consumed.
|
|
||||||
index += 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void consumeTemplateSuffix()
|
|
||||||
{
|
|
||||||
index += 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This function assumes hasTemplatePrefix(char) has returned true and will consume both the prefix and suffix
|
* This function assumes hasTemplatePrefix(char) has returned true and will consume the prefix / suffix
|
||||||
* @return the token found between the prefix and suffix
|
* @return the token found between the prefix and suffix
|
||||||
* @throws LexerSyntaxError if the parser is unable to process a full token
|
* @throws LexerSyntaxError if the parser is unable to process a full token
|
||||||
*/
|
*/
|
||||||
inline std::string consumeToken(){
|
inline std::string consumeToken()
|
||||||
|
{
|
||||||
consumeTemplatePrefix();
|
consumeTemplatePrefix();
|
||||||
std::string token;
|
std::string token;
|
||||||
while (!hasTemplateSuffix()) {
|
while (!hasTemplateSuffix())
|
||||||
if (!hasNext()) {
|
{
|
||||||
|
if (!hasNext())
|
||||||
|
{
|
||||||
throw LexerSyntaxError();
|
throw LexerSyntaxError();
|
||||||
}
|
}
|
||||||
token += consume();
|
token += consume();
|
||||||
|
@ -70,11 +73,129 @@ namespace cs
|
||||||
consumeTemplateSuffix();
|
consumeTemplateSuffix();
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
inline char consume()
|
class CacheLexer : public LexerBase
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit CacheLexer(std::string str): LexerBase(std::move(str))
|
||||||
|
{}
|
||||||
|
|
||||||
|
static inline bool isCharNext(char c)
|
||||||
{
|
{
|
||||||
return str[index++];
|
switch (c)
|
||||||
|
{
|
||||||
|
case '$':
|
||||||
|
case '@':
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool hasTemplatePrefix()
|
||||||
|
{
|
||||||
|
if (index + 2 >= str.size())
|
||||||
|
return false;
|
||||||
|
return str[index] == '{' && str[index + 1] == '{' && isCharNext(str[index + 2]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class RuntimeLexer : public LexerBase
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
class LogicalEval
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
enum class TokenType
|
||||||
|
{
|
||||||
|
AND, // &&
|
||||||
|
OR, // ||
|
||||||
|
NOT, // !
|
||||||
|
OPEN, // (
|
||||||
|
CLOSE // )
|
||||||
|
};
|
||||||
|
struct Token {
|
||||||
|
TokenType type;
|
||||||
|
std::optional<std::string> value;
|
||||||
|
};
|
||||||
|
std::vector<Token> tokens;
|
||||||
|
size_t m_index = 0;
|
||||||
|
public:
|
||||||
|
void processString(const std::string& str)
|
||||||
|
{
|
||||||
|
size_t index = 0;
|
||||||
|
while (index < str.size())
|
||||||
|
{
|
||||||
|
char c = str[index++];
|
||||||
|
if (c == '&' || c == '|'){
|
||||||
|
|
||||||
|
} else if (c == '!') {
|
||||||
|
|
||||||
|
} else if (c == '(') {
|
||||||
|
tokens.emplace_back(TokenType::OPEN);
|
||||||
|
} else if (c == ')') {
|
||||||
|
tokens.emplace_back(TokenType::CLOSE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit RuntimeLexer(std::string str): LexerBase(std::move(str))
|
||||||
|
{}
|
||||||
|
|
||||||
|
inline bool hasTemplatePrefix(char c = '%')
|
||||||
|
{
|
||||||
|
if (index + 2 >= str.size())
|
||||||
|
return false;
|
||||||
|
return str[index] == '{' && str[index + 1] == '{' && str[index + 2] == c;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string consumeToEndTemplate(const std::string& tokenName)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t findLastTagLocation(const std::string& tag, const std::string& data)
|
||||||
|
{
|
||||||
|
std::vector<size_t> tagLocations{};
|
||||||
|
RuntimeLexer lexer(data);
|
||||||
|
while (lexer.hasNext())
|
||||||
|
{
|
||||||
|
if (lexer.hasTemplatePrefix('/'))
|
||||||
|
{
|
||||||
|
auto loc = lexer.index;
|
||||||
|
auto token = lexer.consumeToken();
|
||||||
|
if (tag == token)
|
||||||
|
tagLocations.push_back(loc);
|
||||||
|
} else
|
||||||
|
lexer.consume();
|
||||||
|
}
|
||||||
|
if (tagLocations.empty())
|
||||||
|
throw LexerSearchFailure(tag);
|
||||||
|
return tagLocations[tagLocations.size() - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string searchAndReplace(const std::string& data, const RuntimeContext& context)
|
||||||
|
{
|
||||||
|
RuntimeLexer lexer(data);
|
||||||
|
std::string results;
|
||||||
|
while (lexer.hasNext())
|
||||||
|
{
|
||||||
|
if (lexer.hasTemplatePrefix())
|
||||||
|
{
|
||||||
|
auto token = lexer.consumeToken();
|
||||||
|
auto searchField = lexer.str.substr(lexer.index);
|
||||||
|
auto endTokenLoc = RuntimeLexer::findLastTagLocation(token, searchField);
|
||||||
|
|
||||||
|
auto
|
||||||
|
} else
|
||||||
|
results += lexer.consume();
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -206,7 +327,7 @@ namespace cs
|
||||||
|
|
||||||
void CacheEngine::resolveLinks(const std::string& file, HTMLPage& page)
|
void CacheEngine::resolveLinks(const std::string& file, HTMLPage& page)
|
||||||
{
|
{
|
||||||
StringLexer lexer(page.getRawSite());
|
CacheLexer lexer(page.getRawSite());
|
||||||
std::string resolvedSite;
|
std::string resolvedSite;
|
||||||
|
|
||||||
const std::string valid_file_endings[3] = {
|
const std::string valid_file_endings[3] = {
|
||||||
|
@ -217,13 +338,14 @@ namespace cs
|
||||||
|
|
||||||
while (lexer.hasNext())
|
while (lexer.hasNext())
|
||||||
{
|
{
|
||||||
if (lexer.hasTemplatePrefix('@'))
|
if (lexer.hasTemplatePrefix())
|
||||||
{
|
{
|
||||||
|
auto prefix = lexer.peekPrefix();
|
||||||
auto token = lexer.consumeToken();
|
auto token = lexer.consumeToken();
|
||||||
for (const auto& suffix : valid_file_endings)
|
|
||||||
|
switch (prefix)
|
||||||
{
|
{
|
||||||
if (token.ends_with(suffix))
|
case '@':
|
||||||
{
|
|
||||||
if (token == file)
|
if (token == file)
|
||||||
{
|
{
|
||||||
BLT_WARN("Recursive load detected!");
|
BLT_WARN("Recursive load detected!");
|
||||||
|
@ -231,24 +353,31 @@ namespace cs
|
||||||
BLT_WARN("Detected in file '%s' offending link '%s'", file.c_str(), token.c_str());
|
BLT_WARN("Detected in file '%s' offending link '%s'", file.c_str(), token.c_str());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
resolvedSite += fetch(token);
|
for (const auto& suffix : valid_file_endings)
|
||||||
break;
|
{
|
||||||
}
|
if (token.ends_with(suffix))
|
||||||
}
|
{
|
||||||
} else if (lexer.hasTemplatePrefix('$'))
|
resolvedSite += fetch(token);
|
||||||
{
|
break;
|
||||||
auto token = lexer.consumeToken();
|
}
|
||||||
if (std::find_if(
|
|
||||||
m_Context.begin(), m_Context.end(),
|
|
||||||
[&token](auto in) -> bool {
|
|
||||||
return token == in.first;
|
|
||||||
}
|
}
|
||||||
) == m_Context.end())
|
break;
|
||||||
{
|
case '$':
|
||||||
// unable to find the token, we should throw an error to tell the user! (or admin in this case)
|
if (std::find_if(
|
||||||
BLT_WARN("Unable to find token '%s'!", token.c_str());
|
m_Context.begin(), m_Context.end(),
|
||||||
} else
|
[&token](auto in) -> bool {
|
||||||
resolvedSite += m_Context[token];
|
return token == in.first;
|
||||||
|
}
|
||||||
|
) == m_Context.end())
|
||||||
|
{
|
||||||
|
// unable to find the token, we should throw an error to tell the user! (or admin in this case)
|
||||||
|
BLT_WARN("Unable to find token '%s'!", token.c_str());
|
||||||
|
} else
|
||||||
|
resolvedSite += m_Context[token];
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
resolvedSite += lexer.consume();
|
resolvedSite += lexer.consume();
|
||||||
}
|
}
|
||||||
|
@ -256,5 +385,11 @@ namespace cs
|
||||||
page.getRawSite() = resolvedSite;
|
page.getRawSite() = resolvedSite;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string CacheEngine::fetch(const std::string& path, const RuntimeContext& context)
|
||||||
|
{
|
||||||
|
auto fetched = fetch(path);
|
||||||
|
return RuntimeLexer::searchAndReplace(fetched, context);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
Loading…
Reference in New Issue