i hate async

main
Brett 2025-07-03 22:03:14 -04:00
parent 301483810e
commit ef258f05ad
6 changed files with 39 additions and 52 deletions

View File

@ -196,13 +196,13 @@ async def handle_article_url(message: discord.Message, url: str) -> None:
LOGGER.info("Received URL from %s: %s", message.author, url) LOGGER.info("Received URL from %s: %s", message.author, url)
try: try:
title, processed_html = await server.article_repository.get_article_async(url)
if await server.article_repository.has_paragraphs(url): if await server.article_repository.has_paragraphs(url):
await message.channel.send("This article has already been processed.") await message.channel.send("This article has already been processed.")
LOGGER.info(f"Article {url} already processed") LOGGER.info(f"Article {url} already processed")
return return
title, processed_html = await server.article_repository.fetch_article(url)
LOGGER.info(f"Article {url} has not been processed. Beginning now!") LOGGER.info(f"Article {url} has not been processed. Beginning now!")
summary_bot = ChatBot(summary_system_prompt) summary_bot = ChatBot(summary_system_prompt)
@ -347,22 +347,24 @@ async def on_message(message: discord.Message) -> None:
# Launch the processing task without blocking Discords event loop # Launch the processing task without blocking Discords event loop
asyncio.create_task(handle_article_url(message, url)) asyncio.create_task(handle_article_url(message, url))
def _run_flask_blocking() -> NoReturn: # helper returns never async def start_discord():
server.app.run(host="0.0.0.0", port=8000, debug=False, use_reloader=False) await bot.start(DISCORD_TOKEN)
async def main():
def main() -> None:
if DISCORD_TOKEN is None: if DISCORD_TOKEN is None:
raise RuntimeError("Set the DISCORD_TOKEN environment variable or add it to a .env file.") raise RuntimeError("Set the DISCORD_TOKEN environment variable or add it to a .env file.")
thread = threading.Thread(target=_run_flask_blocking, daemon=True, name="flask-api")
thread.start()
try: try:
bot.run(DISCORD_TOKEN) web_task = server.app.run_task(host="0.0.0.0", port=8000, debug=False)
discord_task = start_discord()
await asyncio.gather(web_task, discord_task)
finally: finally:
asyncio.run(PlaywrightPool.stop()) await PlaywrightPool.stop()
server.article_repository.close() server.article_repository.close()
if not bot.is_closed():
await bot.close()
if __name__ == "__main__": if __name__ == "__main__":
main() asyncio.run(main())

View File

@ -137,9 +137,9 @@ class ArticleRepository:
# public API # public API
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
async def get_article_async(self, url: str) -> tuple[str, str]: async def fetch_article(self, url: str) -> tuple[str, str]:
async with self._lock: async with self._lock:
result = self._get_article(url) result = await self.get_article(url)
if result: if result:
return result return result
@ -163,34 +163,17 @@ class ArticleRepository:
return title, processed_html return title, processed_html
def get_article(self, url: str) -> tuple[str, str] | None: async def get_article(self, url: str) -> tuple[str, str] | None:
try: async with self._lock:
self._lock.acquire() # Single writer at a time when using sqlite3 avoids `database is locked`
return self._get_article(url) row = self._row_for_url(url)
except Exception as exc:
LOGGER.exception(f"[ArticleRepository] Error while getting article for {url}") if row: # row = (id, url, title, raw, processed)
LOGGER.exception(exc) LOGGER.info(f"[ArticleRepository] Found cached article for {url}")
return row[2], row[4] # processed_html already present
LOGGER.info(f"[ArticleRepository] Article was not found for {url}")
return None return None
finally:
if self._lock.locked():
self._lock.release()
def _get_article(self, url: str) -> tuple[str, str] | None:
"""
Main entry point.
Returns the processed text if it is already cached.
Otherwise downloads it, processes it, stores it, and returns it.
"""
# Single writer at a time when using sqlite3 avoids `database is locked`
row = self._row_for_url(url)
if row: # row = (id, url, title, raw, processed)
LOGGER.info(f"[ArticleRepository] Found cached article for {url}")
return row[2], row[4] # processed_html already present
LOGGER.info(f"[ArticleRepository] Article was not found for {url}")
return None
async def has_paragraphs(self, url) -> bool: async def has_paragraphs(self, url) -> bool:
async with self._lock: async with self._lock:
@ -206,15 +189,12 @@ class ArticleRepository:
return False return False
return True return True
def get_latest_articles(self, count): async def get_latest_articles(self, count):
try: async with self._lock:
self._lock.acquire()
cur = self._conn.cursor() cur = self._conn.cursor()
row = cur.execute(f"SELECT id, url, title, processed_html FROM articles ORDER BY id DESC LIMIT {self.cursor_type}", (count,)) row = cur.execute(f"SELECT id, url, title, processed_html FROM articles ORDER BY id DESC LIMIT {self.cursor_type}", (count,))
return row.fetchall() return row.fetchall()
finally:
self._lock.release()
async def set_paragraphs(self, url, paragraphs, summary, summary_ratings, topics, topic_ratings): async def set_paragraphs(self, url, paragraphs, summary, summary_ratings, topics, topic_ratings):
async with self._lock: async with self._lock:

View File

@ -1,4 +1,4 @@
from flask import Flask, request, jsonify, abort from quart import Quart, request, jsonify, abort
from pathlib import Path from pathlib import Path
import logging import logging
@ -6,27 +6,31 @@ import logging
# Adjust the relative import path if pool.py lives in a package. # Adjust the relative import path if pool.py lives in a package.
from pool import ArticleRepository from pool import ArticleRepository
app = Flask(__name__) app = Quart(__name__)
article_repository = ArticleRepository() article_repository = ArticleRepository()
LOGGER = logging.getLogger("server") LOGGER = logging.getLogger("server")
@app.route("/health")
async def health():
return {"status": "ok"}
@app.route("/articles/<article_url>", methods=["GET"]) @app.route("/articles/<article_url>", methods=["GET"])
def get_article(article_url: str): async def get_article(article_url: str):
article = article_repository.get_article(article_url) article = await article_repository.get_article(article_url)
if article is None: if article is None:
abort(404, description="Article not found") abort(404, description="Article not found")
return jsonify(article) return jsonify(article)
@app.route("/article-by-url", methods=["GET"]) @app.route("/article-by-url", methods=["GET"])
def get_article_by_url(): async def get_article_by_url():
url = request.args.get("url") url = request.args.get("url")
if not url: if not url:
abort(400, description="`url` query parameter is required") abort(400, description="`url` query parameter is required")
LOGGER.info(f"Fetching article by URL: {url}") LOGGER.info(f"Fetching article by URL: {url}")
article = article_repository.get_article(url) article = await article_repository.get_article(url)
if article is None: if article is None:
abort(404, description="Article not found") abort(404, description="Article not found")
return jsonify(article) return jsonify(article)

View File

@ -10,6 +10,7 @@ in pkgs.mkShell {
trafilatura trafilatura
playwright playwright
flask flask
quart
])) ]))
]; ];
propagatedBuildInputs = with pkgs; [ propagatedBuildInputs = with pkgs; [