Compare commits

...

8 Commits

Author SHA1 Message Date
Brett c99b4dbd4a check 2025-07-08 23:11:44 -04:00
Brett 0e2321952e hope 2025-07-08 22:17:55 -04:00
Brett d2bd5290ee i hope final news 2025-07-08 22:17:03 -04:00
Brett 8f069f17f4 even even more news 2025-07-08 22:13:12 -04:00
Brett e97ca2a41c even more 2025-07-08 22:12:40 -04:00
Brett 12bf471068 more news 2025-07-08 22:11:28 -04:00
Brett d534ce6a07 hello 2025-07-08 22:08:40 -04:00
Brett 5d54ef4cc3 finished 2025-07-08 21:43:59 -04:00
9 changed files with 671 additions and 21 deletions

View File

@ -312,12 +312,31 @@ async def on_ready() -> None:
LOGGER.info("Playwright pool ready")
LOGGER.info("------")
async def process_articles(message: discord.Message):
await message.channel.send("Processing incomplete articles...")
LOGGER.info("Fetching incomplete articles")
urls = await server.article_repository.fetch_incomplete()
for url in urls:
LOGGER.info(f"Processing incomplete article {url}")
await message.channel.send(f"Processing incomplete article {url}")
await handle_article_url(message, url)
await message.channel.send("Done!")
@bot.event
async def on_message(message: discord.Message) -> None:
# Ignore our own messages
if message.author == bot.user:
return
if message.author.id != 199680010267656192:
await message.channel.send("Only authorized users are allowed to use this bot.")
return
if message.content.startswith("!"):
if message.content == "!process":
asyncio.create_task(process_articles(message))
return
is_dm = message.guild is None
overwrite = False
@ -337,10 +356,6 @@ async def on_message(message: discord.Message) -> None:
if not (is_dm or is_mention):
return
if is_dm and message.author.id != 199680010267656192:
await message.channel.send("Only authorized users are allowed to use this bot.")
return
url = extract_first_url(message.content)
if not url:
await message.channel.send("Please send me a link to a news article.")

View File

@ -149,6 +149,17 @@ class ArticleRepository:
# public API
# ------------------------------------------------------------------ #
async def fetch_incomplete(self) -> list[str]:
async with self._lock:
cur = self._conn.cursor()
row = cur.execute(f"""
SELECT url FROM articles AS a WHERE ((SELECT COUNT(*) FROM summaries WHERE article_id = a.id) = 0 OR (SELECT COUNT(*) FROM paragraphs WHERE article_id = a.id) = 0)
""")
results = row.fetchall()
return [url[0] for url in results]
async def fetch_article(self, url: str) -> tuple[str, str]:
async with self._lock:
result = self._get_article(url)
@ -216,6 +227,42 @@ class ArticleRepository:
return row.fetchall()
async def search_articles(self, text, count, last):
async with self._lock:
text = "%" + text + "%"
cur = self._conn.cursor()
if last > 0:
row = cur.execute(
f"""
SELECT id, url, title, processed_html
FROM (
SELECT id, url, title, processed_html
FROM articles
WHERE
(url LIKE {self.cursor_type}
OR
title LIKE {self.cursor_type}
OR
processed_html LIKE {self.cursor_type})
AND
id < {self.cursor_type}
ORDER BY id DESC LIMIT {self.cursor_type})
""", (text, text, text, last, count))
else:
row = cur.execute(f"""
SELECT id, url, title, processed_html FROM (
SELECT id, url, title, processed_html, {self.cursor_type} AS text
FROM articles
WHERE
processed_html LIKE text
OR
title LIKE text
OR
url LIKE text) ORDER BY id DESC LIMIT {self.cursor_type}
""", (text, count))
return row.fetchall()
async def get_paragraphs(self, article_url : str) -> ArticleParagraphs | None:
async with self._lock:
cur = self._conn.cursor()

View File

@ -16,49 +16,91 @@ article_repository = ArticleRepository()
LOGGER = logging.getLogger("server")
@app.route("/")
@app.route("/news")
async def index():
return await send_from_directory("static", "index.html")
@app.route("/index.html")
@app.route("/news/")
async def index_root():
return await index()
@app.route("/news/index.html")
async def index_html():
return await index()
@app.route("/view.html")
@app.route("/news/view.html")
async def view_html():
return await send_from_directory("static", "view.html")
@app.route("/view")
@app.route("/news/view")
async def view():
return await view_html()
@app.route("/api/health")
@app.route("/news/browse.html")
async def browse_html():
return await send_from_directory("static", "browse.html")
@app.route("/news/browse")
async def browse():
return await browse_html()
@app.route("/news/search.html")
async def search_html():
return await send_from_directory("static", "search.html")
@app.route("/news/search")
async def search():
return await search_html()
@app.route("/news/api/health")
async def health():
return {"status": "ok"}
@app.route("/api/article/<path:article_url>", methods=["GET"])
@app.route("/news/api/article/<path:article_url>", methods=["GET"])
async def get_article(article_url: str):
article = await article_repository.get_article(article_url)
if article is None:
abort(404, description="Article not found")
return jsonify(article)
@app.route("/api/articles", methods=["GET"])
@app.route("/news/api/articles", methods=["GET"])
async def get_articles():
count = min(int(request.args.get("count") or "25"), 125)
last = int(request.args.get("last") or "-1")
articles = await article_repository.get_latest_articles(count, last)
json_obj = []
for _, url, title, processed_html in articles:
for _id, url, title, processed_html in articles:
json_obj.append({url: {
"title": title,
"processed_text": processed_html,
"id": _id
}})
return jsonify(json_obj)
@app.route("/api/view_article", methods=["GET"])
@app.route("/news/api/search", methods=["GET"])
async def search_articles():
text = request.args.get("text")
count = min(int(request.args.get("count") or "25"), 125)
last = int(request.args.get("last") or "-1")
if not text:
abort(400, description="`text` query parameter is required")
articles = await article_repository.search_articles(text, count, last)
LOGGER.info(f"Found {len(articles)} articles for search query: {text}")
json_obj = []
for _id, url, title, processed_html in articles:
json_obj.append({url: {
"title": title,
"processed_text": processed_html,
"id": _id
}})
return jsonify(json_obj)
@app.route("/news/api/view_article", methods=["GET"])
async def view_article():
url = request.args.get("url")
if not url:
@ -90,7 +132,7 @@ async def view_article():
return jsonify(article)
@app.route("/article-by-url", methods=["GET"])
@app.route("/news/article-by-url", methods=["GET"])
async def get_article_by_url():
url = request.args.get("url")
if not url:

197
news/static/browse.html Normal file
View File

@ -0,0 +1,197 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Browse Articles</title>
<style>
/* ─────────── Global title bar ─────────── */
.navbar{
position:fixed;
top:0; left:0; right:0;
height:3rem;
background:#0d47a1;
color:#fff;
display:flex;
align-items:center;
justify-content:space-between;
padding:0 1rem;
box-shadow:0 1px 4px rgba(0,0,0,.15);
z-index:100;
}
.navbar .brand{
font-size:1.15rem;
font-weight:700;
color:#fff;
text-decoration:none;
}
.navbar .nav-link{
color:#fff;
text-decoration:none;
margin-left:1rem;
font-size:.95rem;
}
.navbar .nav-link:hover{
text-decoration:underline;
}
body{
font-family: sans-serif;
margin:0;
padding: 4rem 1rem 2rem;
background:#f7f7f7;
}
.article{
background:#fff;
margin:0.5rem auto;
max-width:700px;
padding:1rem 1.25rem;
border-radius:4px;
box-shadow:0 1px 4px rgba(0,0,0,.08);
}
.article h2{
margin:0 0 .5rem;
font-size:1.2rem;
color:#333;
}
.article .meta{
font-size:.85rem;
color:#666;
margin-bottom:.75rem;
}
.article p{
margin:0;
line-height:1.5;
}
#loader{
text-align:center;
padding:2rem 0;
color:#777;
}
.article-link,
.article-link:visited,
.article-link:hover,
.article-link:active,
.article-link:focus {
text-decoration: none;
color: inherit; /* keep the original text color */
}
#endMarker{
text-align:center;
color:#555;
padding:1.5rem 0;
}
</style>
</head>
<body>
<header class="navbar">
<a href="/news/" class="brand">Newsulizer</a>
<nav>
<a href="/news/" class="nav-link">Home</a>
<a href="/news/browse" class="nav-link">Browse</a>
<a href="/news/search" class="nav-link">Search</a>
</nav>
</header>
<div style="display: flex; justify-content: center;">
<h1>Browse Articles</h1>
</div>
<main id="feed"></main>
<div id="loader" hidden>Loading…</div>
<div id="endMarker" hidden>End of results</div>
<script type="module">
const feed = document.getElementById('feed');
const loader = document.getElementById('loader');
const endMarker = document.getElementById('endMarker');
let isFetching = false;
let reachedEnd = false;
let lastId; // undefined until the first batch returns
/* -------- kick-off -------- */
window.addEventListener('DOMContentLoaded', loadMore);
/* -------- infinite scroll -------- */
window.addEventListener('scroll', () => {
if (reachedEnd || isFetching) return;
const nearBottom = window.innerHeight + window.scrollY >= document.body.offsetHeight - 600;
if (nearBottom) loadMore();
});
async function loadMore(){
if (reachedEnd) return;
isFetching = true;
toggleLoader(true);
try{
const url = new URL('/news/api/articles', window.location.origin);
if (lastId !== undefined) url.searchParams.set('last', lastId);
url.searchParams.set("count", "5")
const res = await fetch(url);
if (!res.ok) throw new Error('Failed to load articles');
const articles = await res.json(); // expecting an array
if (articles.length === 0){
reachedEnd = true;
endMarker.hidden = false;
toggleLoader(false);
return;
}
render(articles);
lastId = Object.entries(articles[articles.length - 1])[0][1].id; // save the lowest id for next call
console.log(lastId);
}catch(err){
console.error(err);
}finally{
isFetching = false;
toggleLoader(false);
}
}
function render(items){
const frag = document.createDocumentFragment();
items.forEach(a => {
const [url, meta] = Object.entries(a)[0];
const link = document.createElement('a');
link.className = 'article-link';
link.href = '/news/view?url=' + encodeURIComponent(url);
const article = document.createElement('article');
const txt = meta.processed_text;
let reg = txt.replace(/(\(.*[^\w:_; '.,"\s]+.*\))/g, '')
reg = reg.replace(/(\[.*])/g, '')
reg = reg.replace(/([^\w:_; '.,"\s/]+)/g, '')
const words = reg.split(/\s/g)
reg = words.slice(0, Math.min(60, words.length)).join(' ').trim();
if (reg.endsWith('.'))
reg += "..";
else
reg += "...";
article.className = 'article';
article.innerHTML = `
<h2>${escapeHtml(meta.title)}</h2>
<p>${escapeHtml(reg ?? '')}</p>
`;
link.appendChild(article);
frag.appendChild(link);
});
feed.appendChild(frag);
}
function toggleLoader(show){
loader.hidden = !show;
}
/* basic XSS-safe string escape */
const txt = document.createElement('textarea');
function escapeHtml(str){
txt.textContent = str;
return txt.innerHTML;
}
</script>
</body>
</html>

View File

@ -6,10 +6,40 @@
<!-- Tiny bit of styling so it already looks decent -->
<style>
/* ─────────── Global title bar ─────────── */
.navbar{
position:fixed;
top:0; left:0; right:0;
height:3rem;
background:#0d47a1;
color:#fff;
display:flex;
align-items:center;
justify-content:space-between;
padding:0 1rem;
box-shadow:0 1px 4px rgba(0,0,0,.15);
z-index:100;
}
.navbar .brand{
font-size:1.15rem;
font-weight:700;
color:#fff;
text-decoration:none;
}
.navbar .nav-link{
color:#fff;
text-decoration:none;
margin-left:1rem;
font-size:.95rem;
}
.navbar .nav-link:hover{
text-decoration:underline;
}
body{
font-family: Arial, sans-serif;
margin: 0;
padding: 0 1rem;
padding: 4rem 1rem 2rem;
background:#f6f8fa;
}
h1{color:#333;text-align:center;margin-top:1.5rem;}
@ -59,6 +89,15 @@
</style>
</head>
<header class="navbar">
<a href="/news/" class="brand">Newsulizer</a>
<nav>
<a href="/news/" class="nav-link">Home</a>
<a href="/news/browse" class="nav-link">Browse</a>
<a href="/news/search" class="nav-link">Search</a>
</nav>
</header>
<body>
<div style="display: flex; justify-content: center;">
<h1>Newsulizer</h1>
@ -76,7 +115,7 @@
const errorEl = document.getElementById('error');
// Change this to the full path of your API if it differs
const ENDPOINT = '/api/articles?count=25';
const ENDPOINT = '/news/api/articles?count=25';
fetch(ENDPOINT)
.then(res => {
@ -95,7 +134,7 @@
the whole card is a single coherent link */
const link = document.createElement('a');
link.className = 'article-link';
link.href = '/view?url=' + encodeURIComponent(url);
link.href = '/news/view?url=' + encodeURIComponent(url);
const card = document.createElement('article');
card.className = 'article-card';
@ -104,7 +143,17 @@
h2.textContent = meta.title || url;
const p = document.createElement('p');
p.textContent = truncate(meta.processed_text, 280);
const txt = meta.processed_text;
let reg = txt.replace(/(\(.*[^\w:_; '.,"\s]+.*\))/g, '')
reg = reg.replace(/(\[.*])/g, '')
reg = reg.replace(/([^\w:_; '.,"\s/]+)/g, '')
const words = reg.split(/\s/g)
reg = words.slice(0, Math.min(60, words.length)).join(' ').trim();
if (reg.endsWith('.'))
reg += "..";
else
reg += "...";
p.textContent = reg;
card.appendChild(h2);
card.appendChild(p);

298
news/static/search.html Normal file
View File

@ -0,0 +1,298 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>Search</title>
<!-- Simple styling; remove or move to an external sheet as you prefer -->
<style>
/* ─────────── Global title bar ─────────── */
.navbar{
position:fixed;
top:0; left:0; right:0;
height:3rem;
background:#0d47a1;
color:#fff;
display:flex;
align-items:center;
justify-content:space-between;
padding:0 1rem;
box-shadow:0 1px 4px rgba(0,0,0,.15);
z-index:100;
}
.navbar .brand{
font-size:1.15rem;
font-weight:700;
color:#fff;
text-decoration:none;
}
.navbar .nav-link{
color:#fff;
text-decoration:none;
margin-left:1rem;
font-size:.95rem;
}
.navbar .nav-link:hover{
text-decoration:underline;
}
:root {
--accent: #2563eb;
--border-radius: 0.5rem;
}
* { box-sizing: border-box; }
body {
margin: 0;
font-family: system-ui, sans-serif;
padding: 4rem 1rem 2rem;
}
/* ────────── Search bar ────────── */
.search-wrapper{
display:flex;
justify-content:center;
margin-bottom:1.5rem;
}
.search-container {
display: flex;
align-items: center;
width: min(32rem, 90vw);
border: 2px solid #e5e7eb;
border-radius: var(--border-radius);
overflow: hidden;
background: #fff;
transition: border-color 0.2s;
}
.search-container:focus-within {
border-color: var(--accent);
box-shadow: 0 0 0 3px color-mix(in srgb, var(--accent) 30%, transparent);
}
.search-container input {
flex: 1;
border: none;
padding: 0.75rem 1rem;
font-size: 1rem;
outline: none;
}
.search-container button {
background: none;
border: none;
padding: 0 0.75rem;
cursor: pointer;
color: var(--accent);
height: 100%;
display: flex;
align-items: center;
justify-content: center;
}
.search-container svg {
width: 1.25rem;
height: 1.25rem;
}
/* ────────── Article cards (copied from browse) ────────── */
.article{
background:#fff;
margin:0.5rem auto;
max-width:700px;
padding:1rem 1.25rem;
border-radius:4px;
box-shadow:0 1px 4px rgba(0,0,0,.08);
}
.article h2{
margin:0 0 .5rem;
font-size:1.2rem;
color:#333;
}
.article .meta{
font-size:.85rem;
color:#666;
margin-bottom:.75rem;
}
.article p{
margin:0;
line-height:1.5;
}
.article-link,
.article-link:visited,
.article-link:hover,
.article-link:active,
.article-link:focus {
text-decoration: none;
color: inherit;
}
#loader{
text-align:center;
padding:2rem 0;
color:#777;
}
#endMarker{
text-align:center;
color:#555;
padding:1.5rem 0;
}
</style>
</head>
<body>
<!-- ===== NAVBAR ===== -->
<header class="navbar">
<a href="/news/" class="brand">Newsulizer</a>
<nav>
<a href="/news/" class="nav-link">Home</a>
<a href="/news/browse" class="nav-link">Browse</a>
<a href="/news/search" class="nav-link">Search</a>
</nav>
</header>
<!-- ────────── Search bar ────────── -->
<div class="search-wrapper">
<div class="search-container">
<input
type="search"
id="searchInput"
placeholder="Search..."
autocomplete="off"
aria-label="Search"
/>
<button id="searchButton" aria-label="Submit search">
<!-- Simple magnifying-glass icon -->
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" stroke="currentColor" fill="none" stroke-width="2">
<circle cx="11" cy="11" r="7" />
<line x1="16.65" y1="16.65" x2="21" y2="21" />
</svg>
</button>
</div>
</div>
<!-- ────────── Results feed ────────── -->
<main id="feed"></main>
<div id="loader" hidden>Loading…</div>
<div id="endMarker" hidden>End of results</div>
<!-- ────────── Logic ────────── -->
<script type="module">
const searchInput = document.getElementById('searchInput');
const searchBtn = document.getElementById('searchButton');
const feed = document.getElementById('feed');
const loader = document.getElementById('loader');
const endMarker = document.getElementById('endMarker');
const PAGE_SIZE = 5;
let currentQuery = '';
let isFetching = false;
let reachedEnd = false;
let lastId; // undefined until first batch returns
/* ---------- helpers ---------- */
function toggleLoader(show){ loader.hidden = !show; }
function resetFeed(){
feed.innerHTML = '';
lastId = undefined;
reachedEnd = false;
endMarker.hidden = true;
}
function renderArticle(a){
const [url, meta] = Object.entries(a)[0];
const link = document.createElement('a');
link.className = 'article-link';
link.href = '/news/view?url=' + encodeURIComponent(url);
const article = document.createElement('article');
const txt = meta.processed_text;
let reg = txt.replace(/(\(.*[^\w:_; '.,"\s]+.*\))/g, '')
reg = reg.replace(/(\[.*])/g, '')
reg = reg.replace(/([^\w:_; '.,"\s/]+)/g, '')
const words = reg.split(/\s/g)
reg = words.slice(0, Math.min(60, words.length)).join(' ').trim();
if (reg.endsWith('.'))
reg += "..";
else
reg += "...";
article.className = 'article';
article.innerHTML = `
<h2>${escapeHtml(meta.title)}</h2>
<p>${escapeHtml(reg ?? '')}</p>
`;
link.appendChild(article);
feed.appendChild(link);
}
/* ---------- fetch & scroll ---------- */
async function loadMore(){
if (reachedEnd || isFetching || !currentQuery.trim()) return;
isFetching = true;
toggleLoader(true);
try{
const url = new URL('/news/api/search', window.location.origin);
url.searchParams.set('text', currentQuery);
url.searchParams.set('count', PAGE_SIZE);
if (lastId !== undefined) url.searchParams.set('last', lastId);
const resp = await fetch(url);
if (!resp.ok) throw new Error(`Server responded ${resp.status}`);
const data = await resp.json(); // expecting an array
if (!Array.isArray(data)) throw new Error('Invalid payload');
if (data.length === 0){
reachedEnd = true;
endMarker.hidden = false;
} else {
data.forEach(renderArticle);
lastId = Object.entries(data[data.length - 1])[0][1].id;
console.log(data);
if (data.length < PAGE_SIZE){
reachedEnd = true;
endMarker.hidden = false;
}
}
}catch(err){
console.error(err);
}finally{
isFetching = false;
toggleLoader(false);
}
}
window.addEventListener('scroll', () => {
const nearBottom = window.innerHeight + window.scrollY >= document.body.offsetHeight - 600;
if (nearBottom) loadMore();
});
/* ---------- search triggers ---------- */
function performSearch(){
const query = searchInput.value.trim();
if (!query) return;
currentQuery = query;
resetFeed();
loadMore();
}
const txt = document.createElement('textarea');
function escapeHtml(str){
txt.textContent = str;
return txt.innerHTML;
}
searchBtn.addEventListener('click', performSearch);
searchInput.addEventListener('keydown', e => {
if (e.key === 'Enter') performSearch();
});
let debounceTimer;
searchInput.addEventListener("input", () => {
clearTimeout(debounceTimer);
debounceTimer = setTimeout(() => {
performSearch();
}, 400); // ~0.4 s debounce; adjust as needed
});
/* optional: focus input on page load */
window.addEventListener('DOMContentLoaded', () => searchInput.focus());
</script>
</body>
</html>

View File

@ -178,9 +178,11 @@
<body>
<header class="navbar">
<a href="/" class="brand">Newsulizer</a>
<a href="/news/" class="brand">Newsulizer</a>
<nav>
<a href="/" class="nav-link">Home</a>
<a href="/news/" class="nav-link">Home</a>
<a href="/news/browse" class="nav-link">Browse</a>
<a href="/news/search" class="nav-link">Search</a>
</nav>
</header>
@ -216,7 +218,7 @@
(function main(){
const qs = new URLSearchParams(window.location.search);
const url = qs.get('url');
const API = '/api/view_article?url=' + encodeURIComponent(url ?? '');
const API = '/news/api/view_article?url=' + encodeURIComponent(url ?? '');
const elTopics = document.getElementById('topics');
const elSummary = document.getElementById('summary');