diff --git a/news/__pycache__/pool.cpython-311.pyc b/news/__pycache__/pool.cpython-311.pyc index acbcadf..1df12af 100644 Binary files a/news/__pycache__/pool.cpython-311.pyc and b/news/__pycache__/pool.cpython-311.pyc differ diff --git a/news/__pycache__/server.cpython-311.pyc b/news/__pycache__/server.cpython-311.pyc index 1e2f024..0985687 100644 Binary files a/news/__pycache__/server.cpython-311.pyc and b/news/__pycache__/server.cpython-311.pyc differ diff --git a/news/main.py b/news/main.py index b61f360..2ce840b 100644 --- a/news/main.py +++ b/news/main.py @@ -312,12 +312,27 @@ async def on_ready() -> None: LOGGER.info("Playwright pool ready") LOGGER.info("------") +async def process_articles(message: discord.Message): + await message.channel.send("Processing incomplete articles...") + LOGGER.info("Fetching incomplete articles") + urls = await server.article_repository.fetch_incomplete() + for url in urls: + LOGGER.info(f"Processing incomplete article {url}") + await message.channel.send(f"Processing incomplete article {url}") + await handle_article_url(message, url) + await message.channel.send("Done!") + @bot.event async def on_message(message: discord.Message) -> None: # Ignore our own messages if message.author == bot.user: return + if message.content.startswith("!"): + if message.content == "!process": + asyncio.create_task(process_articles(message)) + return + is_dm = message.guild is None overwrite = False diff --git a/news/pool.py b/news/pool.py index 7c77290..36406ed 100644 --- a/news/pool.py +++ b/news/pool.py @@ -149,6 +149,17 @@ class ArticleRepository: # public API # ------------------------------------------------------------------ # + async def fetch_incomplete(self) -> list[str]: + async with self._lock: + cur = self._conn.cursor() + row = cur.execute(f""" + SELECT url FROM articles AS a WHERE ((SELECT COUNT(*) FROM summaries WHERE article_id = a.id) = 0 OR (SELECT COUNT(*) FROM paragraphs WHERE article_id = a.id) = 0) + """) + + results = row.fetchall() + + return [url[0] for url in results] + async def fetch_article(self, url: str) -> tuple[str, str]: async with self._lock: result = self._get_article(url) @@ -216,6 +227,42 @@ class ArticleRepository: return row.fetchall() + async def search_articles(self, text, count, last): + async with self._lock: + text = "%" + text + "%" + cur = self._conn.cursor() + if last > 0: + row = cur.execute( + f""" + SELECT id, url, title, processed_html + FROM ( + SELECT id, url, title, processed_html + FROM articles + WHERE + (url LIKE {self.cursor_type} + OR + title LIKE {self.cursor_type} + OR + processed_html LIKE {self.cursor_type}) + AND + id < {self.cursor_type} + ORDER BY id DESC LIMIT {self.cursor_type}) + """, (text, text, text, last, count)) + else: + row = cur.execute(f""" + SELECT id, url, title, processed_html FROM ( + SELECT id, url, title, processed_html, {self.cursor_type} AS text + FROM articles + WHERE + processed_html LIKE text + OR + title LIKE text + OR + url LIKE text) ORDER BY id DESC LIMIT {self.cursor_type} + """, (text, count)) + + return row.fetchall() + async def get_paragraphs(self, article_url : str) -> ArticleParagraphs | None: async with self._lock: cur = self._conn.cursor() diff --git a/news/server.py b/news/server.py index 9148a45..ca6f974 100644 --- a/news/server.py +++ b/news/server.py @@ -32,6 +32,22 @@ async def view_html(): async def view(): return await view_html() +@app.route("/browse.html") +async def browse_html(): + return await send_from_directory("static", "browse.html") + +@app.route("/browse") +async def browse(): + return await browse_html() + +@app.route("/search.html") +async def search_html(): + return await send_from_directory("static", "search.html") + +@app.route("/search") +async def search(): + return await search_html() + @app.route("/api/health") async def health(): return {"status": "ok"} @@ -50,10 +66,32 @@ async def get_articles(): articles = await article_repository.get_latest_articles(count, last) json_obj = [] - for _, url, title, processed_html in articles: + for _id, url, title, processed_html in articles: json_obj.append({url: { "title": title, "processed_text": processed_html, + "id": _id + }}) + + return jsonify(json_obj) + +@app.route("/api/search", methods=["GET"]) +async def search_articles(): + text = request.args.get("text") + count = min(int(request.args.get("count") or "25"), 125) + last = int(request.args.get("last") or "-1") + if not text: + abort(400, description="`text` query parameter is required") + articles = await article_repository.search_articles(text, count, last) + + LOGGER.info(f"Found {len(articles)} articles for search query: {text}") + + json_obj = [] + for _id, url, title, processed_html in articles: + json_obj.append({url: { + "title": title, + "processed_text": processed_html, + "id": _id }}) return jsonify(json_obj) diff --git a/news/static/browse.html b/news/static/browse.html new file mode 100644 index 0000000..7497d04 --- /dev/null +++ b/news/static/browse.html @@ -0,0 +1,198 @@ + + +
+ +