diff --git a/news/__pycache__/pool.cpython-311.pyc b/news/__pycache__/pool.cpython-311.pyc index e6ee685..7ff9aef 100644 Binary files a/news/__pycache__/pool.cpython-311.pyc and b/news/__pycache__/pool.cpython-311.pyc differ diff --git a/news/__pycache__/server.cpython-311.pyc b/news/__pycache__/server.cpython-311.pyc index 7b9411f..935bcde 100644 Binary files a/news/__pycache__/server.cpython-311.pyc and b/news/__pycache__/server.cpython-311.pyc differ diff --git a/news/main.py b/news/main.py index c89ef45..ffb4539 100644 --- a/news/main.py +++ b/news/main.py @@ -196,13 +196,13 @@ async def handle_article_url(message: discord.Message, url: str) -> None: LOGGER.info("Received URL from %s: %s", message.author, url) try: - title, processed_html = await server.article_repository.get_article_async(url) - if await server.article_repository.has_paragraphs(url): await message.channel.send("This article has already been processed.") LOGGER.info(f"Article {url} already processed") return + title, processed_html = await server.article_repository.fetch_article(url) + LOGGER.info(f"Article {url} has not been processed. Beginning now!") summary_bot = ChatBot(summary_system_prompt) @@ -347,22 +347,24 @@ async def on_message(message: discord.Message) -> None: # Launch the processing task without blocking Discord’s event loop asyncio.create_task(handle_article_url(message, url)) -def _run_flask_blocking() -> NoReturn: # helper returns never - server.app.run(host="0.0.0.0", port=8000, debug=False, use_reloader=False) +async def start_discord(): + await bot.start(DISCORD_TOKEN) - -def main() -> None: +async def main(): if DISCORD_TOKEN is None: raise RuntimeError("Set the DISCORD_TOKEN environment variable or add it to a .env file.") - thread = threading.Thread(target=_run_flask_blocking, daemon=True, name="flask-api") - thread.start() - try: - bot.run(DISCORD_TOKEN) + web_task = server.app.run_task(host="0.0.0.0", port=8000, debug=False) + discord_task = start_discord() + + await asyncio.gather(web_task, discord_task) finally: - asyncio.run(PlaywrightPool.stop()) + await PlaywrightPool.stop() server.article_repository.close() + if not bot.is_closed(): + await bot.close() + if __name__ == "__main__": - main() + asyncio.run(main()) diff --git a/news/pool.py b/news/pool.py index be99a49..be38b29 100644 --- a/news/pool.py +++ b/news/pool.py @@ -137,9 +137,9 @@ class ArticleRepository: # public API # ------------------------------------------------------------------ # - async def get_article_async(self, url: str) -> tuple[str, str]: + async def fetch_article(self, url: str) -> tuple[str, str]: async with self._lock: - result = self._get_article(url) + result = await self.get_article(url) if result: return result @@ -163,34 +163,17 @@ class ArticleRepository: return title, processed_html - def get_article(self, url: str) -> tuple[str, str] | None: - try: - self._lock.acquire() - return self._get_article(url) - except Exception as exc: - LOGGER.exception(f"[ArticleRepository] Error while getting article for {url}") - LOGGER.exception(exc) + async def get_article(self, url: str) -> tuple[str, str] | None: + async with self._lock: + # Single writer at a time when using sqlite3 – avoids `database is locked` + row = self._row_for_url(url) + + if row: # row = (id, url, title, raw, processed) + LOGGER.info(f"[ArticleRepository] Found cached article for {url}") + return row[2], row[4] # processed_html already present + + LOGGER.info(f"[ArticleRepository] Article was not found for {url}") return None - finally: - if self._lock.locked(): - self._lock.release() - - def _get_article(self, url: str) -> tuple[str, str] | None: - """ - Main entry point. - • Returns the processed text if it is already cached. - • Otherwise downloads it, processes it, stores it, and returns it. - """ - - # Single writer at a time when using sqlite3 – avoids `database is locked` - row = self._row_for_url(url) - - if row: # row = (id, url, title, raw, processed) - LOGGER.info(f"[ArticleRepository] Found cached article for {url}") - return row[2], row[4] # processed_html already present - - LOGGER.info(f"[ArticleRepository] Article was not found for {url}") - return None async def has_paragraphs(self, url) -> bool: async with self._lock: @@ -206,15 +189,12 @@ class ArticleRepository: return False return True - def get_latest_articles(self, count): - try: - self._lock.acquire() + async def get_latest_articles(self, count): + async with self._lock: cur = self._conn.cursor() row = cur.execute(f"SELECT id, url, title, processed_html FROM articles ORDER BY id DESC LIMIT {self.cursor_type}", (count,)) return row.fetchall() - finally: - self._lock.release() async def set_paragraphs(self, url, paragraphs, summary, summary_ratings, topics, topic_ratings): async with self._lock: diff --git a/news/server.py b/news/server.py index 6afb151..f61e84e 100644 --- a/news/server.py +++ b/news/server.py @@ -1,4 +1,4 @@ -from flask import Flask, request, jsonify, abort +from quart import Quart, request, jsonify, abort from pathlib import Path import logging @@ -6,27 +6,31 @@ import logging # Adjust the relative import path if pool.py lives in a package. from pool import ArticleRepository -app = Flask(__name__) +app = Quart(__name__) article_repository = ArticleRepository() LOGGER = logging.getLogger("server") +@app.route("/health") +async def health(): + return {"status": "ok"} + @app.route("/articles/", methods=["GET"]) -def get_article(article_url: str): - article = article_repository.get_article(article_url) +async def get_article(article_url: str): + article = await article_repository.get_article(article_url) if article is None: abort(404, description="Article not found") return jsonify(article) @app.route("/article-by-url", methods=["GET"]) -def get_article_by_url(): +async def get_article_by_url(): url = request.args.get("url") if not url: abort(400, description="`url` query parameter is required") LOGGER.info(f"Fetching article by URL: {url}") - article = article_repository.get_article(url) + article = await article_repository.get_article(url) if article is None: abort(404, description="Article not found") return jsonify(article) diff --git a/news/shell.nix b/news/shell.nix index 4b323cd..4c980b8 100644 --- a/news/shell.nix +++ b/news/shell.nix @@ -10,6 +10,7 @@ in pkgs.mkShell { trafilatura playwright flask + quart ])) ]; propagatedBuildInputs = with pkgs; [