From 406741c80a1017fd3e4053c9f012cabfb039ceaf Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 30 Jun 2025 14:11:09 -0400 Subject: [PATCH] hello --- news/.gitignore | 1 + news/__pycache__/pool.cpython-312.pyc | Bin 12525 -> 12520 bytes news/main.py | 42 +++++++++++++++++++++----- newsulizer.sqlite3 | Bin 0 -> 16384 bytes 4 files changed, 35 insertions(+), 8 deletions(-) create mode 100644 newsulizer.sqlite3 diff --git a/news/.gitignore b/news/.gitignore index 6061583..786dfee 100644 --- a/news/.gitignore +++ b/news/.gitignore @@ -1 +1,2 @@ *.sqlite3 +.env \ No newline at end of file diff --git a/news/__pycache__/pool.cpython-312.pyc b/news/__pycache__/pool.cpython-312.pyc index a6efef44e2dcfede381923fbf1000c5b610f704f..99eacd234f25e24f50e0532a4fc711c0fa876116 100644 GIT binary patch delta 43 ycmaEx_#%<_G%qg~0}vE3CS_dT$a|KNg}J1nWb$Li7i^{{L@p$yZNA7PqXz&;>koMV delta 48 zcmaEn_%@OEG%qg~0}zO8PRO{kk@qYkCr55#QFcmxdEVrEj4#-%PNZB&&)$57Nk$I< DsE!gc diff --git a/news/main.py b/news/main.py index bdb4669..690dc5a 100644 --- a/news/main.py +++ b/news/main.py @@ -7,7 +7,7 @@ import os from pathlib import Path from typing import Final, Optional, List, NamedTuple from dataclasses import dataclass -from textwrap import wrap +from textwrap import wrap, fill import discord from dotenv import load_dotenv @@ -84,6 +84,9 @@ relevance_system_prompt = ("You are a specialized analysis program designed to d "Your response to this would then look like:\n" "100") +relevance_system_prompt_2 = "\n".join(["You are a specialized analysis program designed to determine if a paragraph is relevant to the topic of the article.", + "You will be given different inputs and prompts by the user, and you MUST respond with either YES for it is relevant to the paragraph or NO for it is not relevant to the paragraph."]) + @dataclass(frozen=True) class Response: response: ChatResponse @@ -132,6 +135,10 @@ class ChatBot: async def set_model(self, model : str): self.model = model + def set_system(self, system : str): + self.system = system + self.clear() + def clear(self): self.messages = [] self.messages.append({"role": "system", "content": self.system}) @@ -220,13 +227,24 @@ async def handle_article_url(message: discord.Message, url: str) -> None: "num_ctx": 4096 }) + summary_bot.set_system("You are a specialized analysis program designed to summarize articles into their key points.\n " + "You WILL only output a comma seperated list of key points, up to a max of 10 key points. ") + + parts = ",".join(sumr.content() for sumr in await summary_bot.multi_summary(processed_html, options={ + "temperature": 0.5, + "num_ctx": 4096 + })) + + print(parts) + summary_parts_string = [part.content() for part in summary_parts] - summary = "\nSummary: ".join(summary_parts_string) + summary = "\n".join(summary_parts_string) paragraphs = [para for para in processed_html.split("\n") if len(para.strip()) > 0] relevance_bot = ChatBot(relevance_system_prompt) + relevance_bot2 = ChatBot(relevance_system_prompt_2) paragraph_relevance = [] @@ -236,18 +254,26 @@ async def handle_article_url(message: discord.Message, url: str) -> None: summary, "-----\n", "Paragraph:\n ", paragraph, "-----\n"])) - paragraph_relevance.append(response.content()) + print(await relevance_bot2.send_message("The Paragraph you will analyze is as follows. DO NOT RESPOND TO THIS MESSAGE.\n\n" + paragraph)) + res = await relevance_bot2.send_message("Given the following summary, how relevant is the paragraph to the article? Remember, please respond with either YES or NO.\n\n" + summary) + print(res) + keywords = parts.split(",") + restutions = [] + for keyword in keywords: + restutions.append(await relevance_bot2.send_message("Given the following keyword, how relevant is the paragraph to the article? Remember, please respond with either YES or NO.\n\n" + keyword)) + paragraph_relevance.append((response.content(), [*restutions, res])) for i, x in enumerate(paragraph_relevance): - paragraph_relevance[i] = str(int(x)) + paragraph_relevance[i] = (int(x[0]), x[1]) + + average_relevance = sum(x[0] for x in paragraph_relevance) / len(paragraph_relevance) + median_relevance = int(sorted(ref[0] for ref in paragraph_relevance)[len(paragraph_relevance) // 2]) - average_relevance = sum(int(x) for x in paragraph_relevance) / len(paragraph_relevance) - median_relevance = int(sorted(paragraph_relevance)[len(paragraph_relevance) // 2]) relevance_cutoff = min(average_relevance, median_relevance) - LOGGER.info(f"Relevance cutoff: {relevance_cutoff}") + LOGGER.info(f"Relevance cutoff: {relevance_cutoff} From ({average_relevance}, {median_relevance})") - relevance_content = [para + " (" + res + "%)" for para, res in zip(paragraphs, paragraph_relevance) if int(res) >= relevance_cutoff] + relevance_content = [fill(para + " (" + str(res[0]) + "%) [" + res[1] + "]", 80) for para, res in zip(paragraphs, paragraph_relevance)] relevance_prompt = "\n\n".join(relevance_content) # social = await send_chat_with_system("social", processed_html, social_system_prompt, tools) diff --git a/newsulizer.sqlite3 b/newsulizer.sqlite3 new file mode 100644 index 0000000000000000000000000000000000000000..0be78800265ede08d25ccad9cb1838f997c2af5a GIT binary patch literal 16384 zcmeI%O;5rw7zglH2pSSzZk$MZgg|_WU%){cvp5FKF3M?{P~(yTGDMM3ms)SNYCRzEx(P^YrA?%j$am~ z$~(!GPf2-QdD9&V1Rwwb2tWV=5P$##AOHaf`~!hOu25^W=t%i@pf|TdRVL1>FZ6B(~ndz)~VC}Ad@9M%Nj+tQG!Y4 zglP+Y$sIOu%%0(l*cBf!!xeqg)(Lyu7VWJJt0?@LLhxI`T-zMFoY{TBY}c~3gH)hG z`5l%7?VcU^Pu_!?ruX?{_G2{Hs!KWfjLFQ-Jbm-zpit}9DG6p1`TRxguhcvld#Rdt zmLBEwQag&5TPbd;af1Q@2tWV=5P$##AOHafKmY;|fWW>A6yq1e{(oOTFWw6R5P$## sAOHafKmY;|fB*y_kO<)aKNbQ6AOHafKmY;|fB*y_009U