From ef258f05ad77ed9cdddc9f43435fbf012b1ff853 Mon Sep 17 00:00:00 2001 From: Brett Laptop Date: Thu, 3 Jul 2025 22:03:14 -0400 Subject: [PATCH] i hate async --- news/__pycache__/pool.cpython-311.pyc | Bin 21031 -> 20974 bytes news/__pycache__/server.cpython-311.pyc | Bin 1916 -> 2225 bytes news/main.py | 26 +++++++------ news/pool.py | 48 +++++++----------------- news/server.py | 16 +++++--- news/shell.nix | 1 + 6 files changed, 39 insertions(+), 52 deletions(-) diff --git a/news/__pycache__/pool.cpython-311.pyc b/news/__pycache__/pool.cpython-311.pyc index e6ee685346a918b04dc420da314bb245f34e45f4..7ff9aef28ed8acd4751ea4e1d62de9ecd3228f5b 100644 GIT binary patch delta 1662 zcma)+ZAe>J7{|}K_og86C1yw#<~#KkgL8SPxmY9u7t&F#yA zFB>|vGFIm!v>&#$!?BirD72Ri_GPOgTiK^X92H?88;sEpBZ8%leb~-P>DH)gy*d2O zJ@1YK}^Jw@tno;_$C_9q_YVzExg-O=I|j& zm2H56<)u2_a-VlD^UfQ-0^hpAx8|$156NU*#=MEVGGp;0c7gFa$+UzNenrKS8v7N% z)!E?wfXWy=qMI@J-_JDPr;l8eZSyz9}SI{|DbdD+C`T<=L zF>3qITEk`KtTJEo>OCI;83k>0MH|g)qpQa1D{XVpHN8L^ z?cuy8cA=q`aM7gj)>SOpFyR{aRyv)82xpLH8y{di4kONHJOMYIVSH!)PbUMtTouDn z8Q7U23=izJ(Aso>Wri78;LR)=;-KePEjPjrv+(9C24d8}RkZ`S7j0%Hoa^ojgaW=3 z*2F-w^{kL=wu%Wc8HaCKcDVwuBK1!$J zPo1S*{O;0lHQSZT$?J;fxmeX1;L~RF*l0ePSxFA(Ye&f_BIISd7&3X7AhUsx{y4K) zBGI+paM?ZU{`kn;k)nKO_zjwJ^Z9wVl{T<#{p@8)Njr1@_fl3zKFN$n(;+=N=&h(8C#Kz6CQLz}OTZW5d)L?a2WjMRGpAgW2QI|o88MMg!L80#<`I(C zJHM6VFX5`+g|YOL-^jF=cV3tx@f+Dv(*+OBMO&U_37#}t7zayNJ$6aHE)9!2rRZ@Z zgYUpl$SMDdxUI4A3o-aCqNPDH30?{t#ZCCDCuKQLZ>K3f zqL`;}Q@lel4pZR{Y=gz{DHr)OQ^=l%Nky0CsFBHtbpC|~PDXy&cZ-I)4c^{iyd-_s Y+lY^ju(+~_cIQUv`R4yv1)c=^2Y({T#{d8T delta 1736 zcmb7Ee{54l9KU<7-PZQ4?P$xkZfv*qt*&k(8rX!nfUx3ZW93IlWc!2VrG2nM({A@3 zc3Xx;gF@JZ;SuByV}_UzqXw~Rj8POvlt0_1NtY!gghXRvVrrCRfBL-+=4Ld}yL+E^ z-=Fur`+o0!y@s#9hppEuE6oVnq4uLA=QrQ9K6K*C%;4Hm>5Hs8Z5T2|o`^V48`8b5 zqET=QY`15B`f^=L6r^FczVVE+>Zqy@D^!n$W9qP?O~k{KUbbJ};9e$Xz_aEO5c(PV zDYW3IzSPBDX=0|(FVag} z1Y>A2qA#5k*r6A^?3Sk*{vLLx+Fps#Ac~-q!pp)MUv-atvAL%WbsPNXL2AfJ4OxlgC6bW{WUb(ka}Iefly&&? z4u4TXcIV7w!A5d6lC^pAHct`Z%H{>T^OCt>_vGxJtXuY1I$WQd>1Bfp(z2)v8xqM=B}?YofM`7+O7+T9qw*qUwb-@A@rDf z)?1T`0PQb3@iADMA)}=>phV+Djq7xRj3%OSedCJc!Kru20aZUi;~LRNR5D5vVO7)A z2+`GXokWk2D4<4^7*&belHl(WK>g0B$3<}k0q3d24-z5&Xy@nPf&Gv zs>CCNK3X-pDw@f5UjOV%Z%7RKMdp;dQw=+opN<3SXhMsEi;0cHtE%%8*rpZ(fa}$TjjIjeT%h-1d|5}B`vKowM7Zf_|*9Ua*|AWSo89x6Yw-!*V2Y( z*!Gqnp1*d!MM!_s{#?0x8WiX#hYK7gIm9{4vf2)cFS41A&v7>0)7fS)($_%1CcB;z zFiwBib<2d`XU78_*pj{)s1`Q;-2o`CzQ6P$FxK2(I?aB0-ix=VZM{|z2ht6F)dK#E zb?#|3eZfOG!?MXy)(%yE)*(AF&?>;*DoHgFiI% zO%B&NbaNQvFwG7Ix8f(+`QYKM<{~m1S3`}>=q#eO;E{}Q&DTObYufvu>?Zg3DKqVx c!r!M~-{;5Mdkq-u{~3QP@&EW2X(>^E0>@hN0{{R3 diff --git a/news/__pycache__/server.cpython-311.pyc b/news/__pycache__/server.cpython-311.pyc index 7b9411f845eec4faa80fa230bf63c35a3380b058..935bcded819dc103984aaffd55013c2b4e7e9efc 100644 GIT binary patch delta 773 zcmZuv&ubGw6rP!#-Ly@1lh{q|nu;O*Tqpz*Y!S8f6v384ZSBEZ=&qY$BsjZ^2N8-E z6+LK}Lyy)=tW-q*0Wbao8x0715=2nEM7)Uy-|U)z(b;+5?!1}#zK{3zm-afVJyX>& z1VgPj^$WD6P2#22@u$T$MraQr9#&!twc&HGpFo#0t(Qh=#2R(*i?KF=Er@tnNjrOm zSg9>KbY&l0IjjEniuMoxxFxM1GyPR?nlzM`{TlQ5B0jf}DAny+!>h0V(TiR}{SL$r z4PXIbw2drmkqrza(o(3nUd{8}kTkD8M@$t5ArOB{%G03|^RWzM(hzPI;0rvP=wnKM zYz4`DK=Vv6^N09+D$;oXE70K~J_!j6-IUhQwse>BG}*Z{4x3>D3~#JAY*%*8FGU_8}B0N*7HYIBk3WH~ko9$BD>7)Z1e=u$VfKmoY5!58@q zukaZHt<~ZMsn8U@#hM28;RiN1ACURCIMY*0eDc%NQ;UkEL44Mt{L+%tTg=5JMG!xM zOkc@R1kzq43M77U*yQG?l;)(`6=?vuj6hucgJE(Pd;erd4iTmq!IQ%|!dO7c0rbFT ASO5S3 diff --git a/news/main.py b/news/main.py index c89ef45..ffb4539 100644 --- a/news/main.py +++ b/news/main.py @@ -196,13 +196,13 @@ async def handle_article_url(message: discord.Message, url: str) -> None: LOGGER.info("Received URL from %s: %s", message.author, url) try: - title, processed_html = await server.article_repository.get_article_async(url) - if await server.article_repository.has_paragraphs(url): await message.channel.send("This article has already been processed.") LOGGER.info(f"Article {url} already processed") return + title, processed_html = await server.article_repository.fetch_article(url) + LOGGER.info(f"Article {url} has not been processed. Beginning now!") summary_bot = ChatBot(summary_system_prompt) @@ -347,22 +347,24 @@ async def on_message(message: discord.Message) -> None: # Launch the processing task without blocking Discord’s event loop asyncio.create_task(handle_article_url(message, url)) -def _run_flask_blocking() -> NoReturn: # helper returns never - server.app.run(host="0.0.0.0", port=8000, debug=False, use_reloader=False) +async def start_discord(): + await bot.start(DISCORD_TOKEN) - -def main() -> None: +async def main(): if DISCORD_TOKEN is None: raise RuntimeError("Set the DISCORD_TOKEN environment variable or add it to a .env file.") - thread = threading.Thread(target=_run_flask_blocking, daemon=True, name="flask-api") - thread.start() - try: - bot.run(DISCORD_TOKEN) + web_task = server.app.run_task(host="0.0.0.0", port=8000, debug=False) + discord_task = start_discord() + + await asyncio.gather(web_task, discord_task) finally: - asyncio.run(PlaywrightPool.stop()) + await PlaywrightPool.stop() server.article_repository.close() + if not bot.is_closed(): + await bot.close() + if __name__ == "__main__": - main() + asyncio.run(main()) diff --git a/news/pool.py b/news/pool.py index be99a49..be38b29 100644 --- a/news/pool.py +++ b/news/pool.py @@ -137,9 +137,9 @@ class ArticleRepository: # public API # ------------------------------------------------------------------ # - async def get_article_async(self, url: str) -> tuple[str, str]: + async def fetch_article(self, url: str) -> tuple[str, str]: async with self._lock: - result = self._get_article(url) + result = await self.get_article(url) if result: return result @@ -163,34 +163,17 @@ class ArticleRepository: return title, processed_html - def get_article(self, url: str) -> tuple[str, str] | None: - try: - self._lock.acquire() - return self._get_article(url) - except Exception as exc: - LOGGER.exception(f"[ArticleRepository] Error while getting article for {url}") - LOGGER.exception(exc) + async def get_article(self, url: str) -> tuple[str, str] | None: + async with self._lock: + # Single writer at a time when using sqlite3 – avoids `database is locked` + row = self._row_for_url(url) + + if row: # row = (id, url, title, raw, processed) + LOGGER.info(f"[ArticleRepository] Found cached article for {url}") + return row[2], row[4] # processed_html already present + + LOGGER.info(f"[ArticleRepository] Article was not found for {url}") return None - finally: - if self._lock.locked(): - self._lock.release() - - def _get_article(self, url: str) -> tuple[str, str] | None: - """ - Main entry point. - • Returns the processed text if it is already cached. - • Otherwise downloads it, processes it, stores it, and returns it. - """ - - # Single writer at a time when using sqlite3 – avoids `database is locked` - row = self._row_for_url(url) - - if row: # row = (id, url, title, raw, processed) - LOGGER.info(f"[ArticleRepository] Found cached article for {url}") - return row[2], row[4] # processed_html already present - - LOGGER.info(f"[ArticleRepository] Article was not found for {url}") - return None async def has_paragraphs(self, url) -> bool: async with self._lock: @@ -206,15 +189,12 @@ class ArticleRepository: return False return True - def get_latest_articles(self, count): - try: - self._lock.acquire() + async def get_latest_articles(self, count): + async with self._lock: cur = self._conn.cursor() row = cur.execute(f"SELECT id, url, title, processed_html FROM articles ORDER BY id DESC LIMIT {self.cursor_type}", (count,)) return row.fetchall() - finally: - self._lock.release() async def set_paragraphs(self, url, paragraphs, summary, summary_ratings, topics, topic_ratings): async with self._lock: diff --git a/news/server.py b/news/server.py index 6afb151..f61e84e 100644 --- a/news/server.py +++ b/news/server.py @@ -1,4 +1,4 @@ -from flask import Flask, request, jsonify, abort +from quart import Quart, request, jsonify, abort from pathlib import Path import logging @@ -6,27 +6,31 @@ import logging # Adjust the relative import path if pool.py lives in a package. from pool import ArticleRepository -app = Flask(__name__) +app = Quart(__name__) article_repository = ArticleRepository() LOGGER = logging.getLogger("server") +@app.route("/health") +async def health(): + return {"status": "ok"} + @app.route("/articles/", methods=["GET"]) -def get_article(article_url: str): - article = article_repository.get_article(article_url) +async def get_article(article_url: str): + article = await article_repository.get_article(article_url) if article is None: abort(404, description="Article not found") return jsonify(article) @app.route("/article-by-url", methods=["GET"]) -def get_article_by_url(): +async def get_article_by_url(): url = request.args.get("url") if not url: abort(400, description="`url` query parameter is required") LOGGER.info(f"Fetching article by URL: {url}") - article = article_repository.get_article(url) + article = await article_repository.get_article(url) if article is None: abort(404, description="Article not found") return jsonify(article) diff --git a/news/shell.nix b/news/shell.nix index 4b323cd..4c980b8 100644 --- a/news/shell.nix +++ b/news/shell.nix @@ -10,6 +10,7 @@ in pkgs.mkShell { trafilatura playwright flask + quart ])) ]; propagatedBuildInputs = with pkgs; [