From 5d54ef4cc38b6b1a11d34dbbc0f1cdce1762a606 Mon Sep 17 00:00:00 2001 From: Brett Laptop Date: Tue, 8 Jul 2025 21:43:59 -0400 Subject: [PATCH] finished --- news/__pycache__/pool.cpython-311.pyc | Bin 25183 -> 28416 bytes news/__pycache__/server.cpython-311.pyc | Bin 5973 -> 8417 bytes news/main.py | 15 ++ news/pool.py | 47 ++++ news/server.py | 40 +++- news/static/browse.html | 198 ++++++++++++++++ news/static/index.html | 53 ++++- news/static/search.html | 299 ++++++++++++++++++++++++ news/static/view.html | 2 + 9 files changed, 651 insertions(+), 3 deletions(-) create mode 100644 news/static/browse.html create mode 100644 news/static/search.html diff --git a/news/__pycache__/pool.cpython-311.pyc b/news/__pycache__/pool.cpython-311.pyc index acbcadf0d80145e24f1a10485540a6990f8bbab7..1df12af71df864f6821c52d9c63df55e1d4b2c95 100644 GIT binary patch delta 3460 zcmbtWdrVu`8NcWHfiZ8}*gzg!Fl4@v7)TmQ5&{@wnx+Y)z|tfObzI*II5C)LleX`>2J>pS zNV|sL`Ofz`-#Pc3-?{JqoQ(a6r%o2&sszR#kz?XjKG zE-7nVcSP?11N!!hd z`{H8Jm{zZVGaX@XZB6O}Ao>ZrRbk-`giRVO?8YvahC~TlG!(H959ew}`6$nhHCfsF zVxB=G@#9=n)5a#9rw?d4BJ)OJ!_)0K;&Jx~BQz+qcXb{Wd^8;J2W3TYbqhY>nM0l~ zkKk~y%T>F#`J4es6b=ftLT8uYSPpb|ezK#-Q6;je`U-2>LtHx!`}^aK79Qtt96;m~ zyt9_Jg2-ALib*|lmKVhWL;ld z3HJ<{Zci&HI*2+t1`QnrNa+0|R0+{UhI~=>kA&rfp-&F`2YkVxsHM-LhXypfjInsT%U&!Gm?7n<5nxEXIT@*Hx#PuQR$1WXv}a!1!0U{X;{>*koBv7QPvJ z8R2>Mo3a+Kx7)U>lz}Ua5+Q_@{w(1jF;dTA$hM_5U^m!iP z3ILn0Db%ZBSJ@wHI!Gd3UHdXA^5ZU9d2&Jc49AgwzO!;9^9v+6lM&EMDvpvUYksB- z&SXIjoXK-)H1$NaaBOXCcu$@rX`|fPbkeVIl5Rs^zoDP8p>MdE6@@Y3r-|}?y4!Hz z-hltqGpZXBmj^qxH_s^SlH9&)MZ{)_F8LyW=6Lq?k1FFIHDp z$eymLWZ|+xy~-R8P!%t$igVYK37YgNGC?uhfqZl1$)`L*q!|@_XBRd(;XpVjFB=Z6 zF|dxZ+{|TtqlgBrtD{XoAtN9OjY6ar<&w^>Hcyw(dQt#oo2T0iAxEF=5guAGrjU=k zhz-21wyG-XXB~s=} zF?*Lh98v<|5WVDF@1y(`awvb;NMvQ1;!hN%K#xl8oR&&p6ztVQ!WSy>h&t(6t z$$rmNxnQbf7i!HhKDV$anax!=7VLYHIoRP4^0`WJq2jS*L0V(soVyox9ZVLcHAP&h zePO#ZS)A6CaCLhZYFm<9(i%0{R&Gn_taeuVX4_ol-QtGD(mdP#B@SRh6PwT^%aK^m zO`S_}M!V&MmbQhKV@Vx|YCmb>T1XpNXzom!(>ioaS~%O58P7!TVxiGivBUvDt|HE2 zyWTj}IN3DalysBLOSFi~wv=8w;Q7ry|F=%1_F!w6G~+`og?6wjj6kANBJ0K)2^B+fkh+Q}e$Y2O9jNaDKv z2MpwkaaZeAK1ROe4UO=u#4;e@U)81jYb6o#5pwX~wII<7jq zA7fXo;Cm>krD3HBEe%~z5=LcYaL`9Dd0DR~w=#}~%wZq(^;6&QfVwmZO}`wZKfvH0 zBK(L=c)CbS{4XB6mW;BD!)`Jhf9mjDrhWBt#Cqtj02DZoTmk|4Qp;Z3{FpW-TjG$ufIQUurD09lQ-jU9eRg_hn zt|C8Uou_Z_P`_`(;6iaU`e}q62%i8D`IH=BdA%j|*VkxnqD<(mHL9iF{BHj8z7P4^-3YX*Gn&9FU#BGYCYodcDRmu zs2`nPLijSm%LvyHW)WUP_%^~%5PpGh5CLDH`gxMbR>D5`(jJt<10hMI_^Cjn2*(k& zBH;HYJ--5YSd=L9J$uNKse^CJ^Z%IjRfT;`eFQ)Goz&Wtd0OTel#ws`lXMts#Ew6_P}nr delta 1520 zcmZ{k`)}J+6vuOI$7_vdlxW9G=M7anPmgyhL3+Rg5-jx!1OQ5{PporuR@b@x! zBC(FzBJ<+$IN1U6GDBe}{U;Ws*Lp(es{2#Vf-ubUeFWpYsI2CRZ4vS|Q@py2=BTxm>Xshm72{ONOx$>-|Oo9=fL?+Id|S)t{Q?K4WKQe$Ir?^%^fq0d+25~<*oh)l__%R diff --git a/news/__pycache__/server.cpython-311.pyc b/news/__pycache__/server.cpython-311.pyc index 1e2f024434389d64464da1e523569815fc75ced1..09856877c8589bebe5208cbabdb9c00eed8fcf93 100644 GIT binary patch delta 2697 zcmbtVT}%{L6u$Gbzq9PJEGzP}umTnqbiu!(DAXSi0u)kFtELtgW?9!FOYe?pi@Sv= zsiZMgbG4-r6Q1agnzZ4?zS%xB=|huUvyEgDWBXuY+ZT-OLroug&dlyA3w>yMckZ`$ z&U|yuJ?DP+&QJbd_Siqm%d-+_t>2wKIh%jQzLw7yzpj1ERnBvS+=Q+-**Qh#VR$oR zAjj6SNxs-ski&CL5$5N8&cYnKn~n1I&NVsKBK8$u->_CL4so|i=6UcBU2k$}uxyx@ z%cjJfxE_q_^Q`(#`HESVYwU_u+1?fG3XQ#PRd(eHwinqARjabASFqQ~TtD%xU$6>q z3VF7MK^l1{8uXu3PLHsN&^JNzAn$SM1v9mSg*p)O5u5;OnlOOx(NeHfL4R5c@ECrR zqLMqR_#Bx2Ftsxo=(4mvrboo%XYn&I|5^rzDc33DySRkFV|h2LjmA@TZN~T$i^erLvS%mJrMIvP%ttv2eQd83jRnLa;C< ztumcF+hZzZ51pHf6qEK$1I3aZvtFCOm0w=0F-{hiTV!$hxe(`aTwM9a(Fm^@m%kS0 zVU7vtvzICG%u?L3jmS>Z7GMEF3A3koVM-R3OpTU((h zTNO8Jv6gJTPGl)Yu9F+$@~|8Uk+>KW+Q{+BH$+0>hPW|qikoBNoCF@+6ULY+W{w$T z+v-A5iZc-jiRJi)(<84}6;Ulz)5kX8(w7lXDReu)ub1J-9J3NPmv-xfABf||{gE@HvUj}9n-Q3II6}SpHqqZ~_R<|d_2p5>gwR(Ib|bVP zpx~$xp%FlJq<3n@n`-Ro>*#1dpo-zq;fN}X1V`y!WE&;`eHPkN>wSTYO?Kt&2de*)^WAx zeqlB1llD$5N{%JM8QQrA=Dg{lM8&>@xh-jKOPSkX@W5Sj-`#Z2-E>F16TEvc>F!Fo zyAqDvYqIl^XuZ&Vt~-(6b;C7tqiEv!`aWM(dnl+3pnyU&Fm^JkugC#((vNTuLG$@Kj8tcqIY0wScr+B%mR#j& zP&;_30a!k>wz#S-t&{;dpakekf}DSi&rc@{ zq}Pz?XBJn*p63Z$a+RuO@+wkMC7M$8vs{}Augh#RJMF4A$Ur#HK6c%?<(X-<%x#;i bnJi9nbt$fHi7!Al0J7_|mRk1Pnz#M|B0^+? delta 958 zcmZvaO-vI(6vuaGKcJ;7mapBGF06pig;G<|M5(A40ga6$AQ(xcuHB_w!M4t>Mlrzy z#1La5W-c5wF)?BcdLWHw&n8|-(2zNpc+*>nns{`kg~UTAdB2@`Z)V>6&%WXHT`M!UHv*>m|^$%%v?C zcA+IHEcOsD>_eN9;$f4Ry94K86m3gKy>SwV5_2%BuUo#T$BGOu_)6HSKD5HF^sfr5 zi#yc^p4L6CTWi>93huNEiz>UYIYgUu@|h@zBa7{wGha*V7ofEE@V%Ydm26o^Bw zhO}zz+xwwO({K$KMz@uSc%EKx0liW>g>f<+EPPhJf`@cF9Hcl!L3J(!Ml3g#A?0d& z*JwBHYm?)N0qwsh@KjQQ#l+a?=q^sOVrQ$%TI2a~5_B#!!p@1+&9M@1h z^xmUl{-%@5!UWn4P5MjE;vvtH$feU3gqP73wJk`S7jupa%Qw?5fK(8iLT^>I#voJL kicTC9w$w{3`q&0#AFCD_f?bXQecNoL2&gB7?rL-Y0KA{#cK`qY diff --git a/news/main.py b/news/main.py index b61f360..2ce840b 100644 --- a/news/main.py +++ b/news/main.py @@ -312,12 +312,27 @@ async def on_ready() -> None: LOGGER.info("Playwright pool ready") LOGGER.info("------") +async def process_articles(message: discord.Message): + await message.channel.send("Processing incomplete articles...") + LOGGER.info("Fetching incomplete articles") + urls = await server.article_repository.fetch_incomplete() + for url in urls: + LOGGER.info(f"Processing incomplete article {url}") + await message.channel.send(f"Processing incomplete article {url}") + await handle_article_url(message, url) + await message.channel.send("Done!") + @bot.event async def on_message(message: discord.Message) -> None: # Ignore our own messages if message.author == bot.user: return + if message.content.startswith("!"): + if message.content == "!process": + asyncio.create_task(process_articles(message)) + return + is_dm = message.guild is None overwrite = False diff --git a/news/pool.py b/news/pool.py index 7c77290..36406ed 100644 --- a/news/pool.py +++ b/news/pool.py @@ -149,6 +149,17 @@ class ArticleRepository: # public API # ------------------------------------------------------------------ # + async def fetch_incomplete(self) -> list[str]: + async with self._lock: + cur = self._conn.cursor() + row = cur.execute(f""" + SELECT url FROM articles AS a WHERE ((SELECT COUNT(*) FROM summaries WHERE article_id = a.id) = 0 OR (SELECT COUNT(*) FROM paragraphs WHERE article_id = a.id) = 0) + """) + + results = row.fetchall() + + return [url[0] for url in results] + async def fetch_article(self, url: str) -> tuple[str, str]: async with self._lock: result = self._get_article(url) @@ -216,6 +227,42 @@ class ArticleRepository: return row.fetchall() + async def search_articles(self, text, count, last): + async with self._lock: + text = "%" + text + "%" + cur = self._conn.cursor() + if last > 0: + row = cur.execute( + f""" + SELECT id, url, title, processed_html + FROM ( + SELECT id, url, title, processed_html + FROM articles + WHERE + (url LIKE {self.cursor_type} + OR + title LIKE {self.cursor_type} + OR + processed_html LIKE {self.cursor_type}) + AND + id < {self.cursor_type} + ORDER BY id DESC LIMIT {self.cursor_type}) + """, (text, text, text, last, count)) + else: + row = cur.execute(f""" + SELECT id, url, title, processed_html FROM ( + SELECT id, url, title, processed_html, {self.cursor_type} AS text + FROM articles + WHERE + processed_html LIKE text + OR + title LIKE text + OR + url LIKE text) ORDER BY id DESC LIMIT {self.cursor_type} + """, (text, count)) + + return row.fetchall() + async def get_paragraphs(self, article_url : str) -> ArticleParagraphs | None: async with self._lock: cur = self._conn.cursor() diff --git a/news/server.py b/news/server.py index 9148a45..ca6f974 100644 --- a/news/server.py +++ b/news/server.py @@ -32,6 +32,22 @@ async def view_html(): async def view(): return await view_html() +@app.route("/browse.html") +async def browse_html(): + return await send_from_directory("static", "browse.html") + +@app.route("/browse") +async def browse(): + return await browse_html() + +@app.route("/search.html") +async def search_html(): + return await send_from_directory("static", "search.html") + +@app.route("/search") +async def search(): + return await search_html() + @app.route("/api/health") async def health(): return {"status": "ok"} @@ -50,10 +66,32 @@ async def get_articles(): articles = await article_repository.get_latest_articles(count, last) json_obj = [] - for _, url, title, processed_html in articles: + for _id, url, title, processed_html in articles: json_obj.append({url: { "title": title, "processed_text": processed_html, + "id": _id + }}) + + return jsonify(json_obj) + +@app.route("/api/search", methods=["GET"]) +async def search_articles(): + text = request.args.get("text") + count = min(int(request.args.get("count") or "25"), 125) + last = int(request.args.get("last") or "-1") + if not text: + abort(400, description="`text` query parameter is required") + articles = await article_repository.search_articles(text, count, last) + + LOGGER.info(f"Found {len(articles)} articles for search query: {text}") + + json_obj = [] + for _id, url, title, processed_html in articles: + json_obj.append({url: { + "title": title, + "processed_text": processed_html, + "id": _id }}) return jsonify(json_obj) diff --git a/news/static/browse.html b/news/static/browse.html new file mode 100644 index 0000000..7497d04 --- /dev/null +++ b/news/static/browse.html @@ -0,0 +1,198 @@ + + + + + Browse Articles + + + + + +
+

Browse Articles

+
+ +
+ + + + + + \ No newline at end of file diff --git a/news/static/index.html b/news/static/index.html index b57988e..3439cef 100644 --- a/news/static/index.html +++ b/news/static/index.html @@ -6,10 +6,40 @@ + +

Newsulizer

@@ -104,7 +143,17 @@ h2.textContent = meta.title || url; const p = document.createElement('p'); - p.textContent = truncate(meta.processed_text, 280); + const txt = meta.processed_text; + let reg = txt.replace(/(\(.*[^\w:_; '.,’"\s]+.*\))/g, '') + reg = reg.replace(/(\[.*])/g, '') + reg = reg.replace(/([^\w:_; '.,’"\s/]+)/g, '') + const words = reg.split(/\s/g) + reg = words.slice(0, Math.min(60, words.length)).join(' ').trim(); + if (reg.endsWith('.')) + reg += ".."; + else + reg += "..."; + p.textContent = reg; card.appendChild(h2); card.appendChild(p); diff --git a/news/static/search.html b/news/static/search.html new file mode 100644 index 0000000..7061068 --- /dev/null +++ b/news/static/search.html @@ -0,0 +1,299 @@ + + + + + Search + + + + + + + + + + +
+
+ + +
+
+ + +
+ + + + + + + \ No newline at end of file diff --git a/news/static/view.html b/news/static/view.html index 0ea6658..e1a4928 100644 --- a/news/static/view.html +++ b/news/static/view.html @@ -181,6 +181,8 @@ Newsulizer