sorta working relevance

2025-06-29 21:32:23 -04:00 · 2025-06-29 21:32:23 -04:00 · fdffd314fc
parent 1dafa31a60
commit fdffd314fc
3 changed files with 120 additions and 28 deletions
--- a/news/pycache/pool.cpython-311.pyc
+++ b/news/pycache/pool.cpython-311.pyc
--- a/news/main.py
+++ b/news/main.py
@ -4,7 +4,10 @@ import asyncio
 import collections
 import logging
 import os
-from typing import Final, Optional, List
+from pathlib import Path
 from typing import Final, Optional, List, NamedTuple
 from dataclasses import dataclass
 from textwrap import wrap
 import discord
 from dotenv import load_dotenv
@ -62,6 +65,36 @@ facts_system_prompt = ("You are a specialized analysis program designed to deter
                       "If the article only presents opinions about genocide, then it is not accurately representing what happened). "
                       "You WILL give rating of this article by calling the increment tool if you read a paragraph (seperated by newlines) which is accurately representing facts, and decrement if it is not.")
 summary_system_prompt = ("You are a specialized analysis program designed to summarize articles. "
                         "You WILL be given an article and you WILL output a summary of the article in less than 300 words. "
                         "Do NOT include that you are attempting to meet the word count in your response.")
 relevance_system_prompt = ("You are a specialized analysis program designed to determine if a paragraph is relevant to the topic of the article. "
                           "You will be given a summary of the article and a paragraph of text. "
                           "You WILL respond with number between 0 and 100 indicating how relevant the paragraph is to the article."
                           "You WILL NOT output anything else besides the number. "
                           "An example response to a given input would be:\n "
                           "-----\n"
                           "Summary:\n "
                           "The president of the United States has been in the White House for 20 years.\n"
                           "-----\n"
                           "Paragraph:\n"
                           "\"The president of the United States has been in the White House for 20 years.\"\n"
                           "-----\n"
                           "Your response to this would then look like:\n"
                           "100")
@dataclass(frozen=True)
 class Response:
    response: ChatResponse
    def content(self):
        return self.response["message"]["content"]
    def tools(self):
        return self.response["message"]["tool_calls"]
 class ChatBot:
    def __init__(self, system : str, host : str="192.168.69.3:11434"):
        self.client = AsyncClient(host=host)
@ -70,18 +103,36 @@ class ChatBot:
        self.model = "llama3.2:3b"
        self.clear()
-    async def send_message(self, message : str):
+    async def send_message(self, message : str, **kwargs) -> Response:
        self.messages.append({"role": "user", "content": message})
        response = await self.client.chat(
            model=self.model,
            messages=self.messages,
-            stream=False)
+            stream=False,
            **kwargs)
        self.messages.append({"role": "assistant", "content": response["message"]["content"]})
        return Response(response)
    async def single_chat(self, message : str, **kwargs) -> Response:
        messages = [{"role": "system", "content": self.system}, {"role": "user", "content": message}]
        return Response(await self.client.chat(
            model=self.model,
            messages=messages,
            stream=False,
            **kwargs
        ))
    async def multi_summary(self, message: str, **kwargs) -> list[Response]:
        chunks = wrap(message, width=(4096 - len(self.system) - 64))
        responses = []
        for chunk in chunks:
            responses.append(await self.single_chat(chunk, **kwargs))
        return responses
    async def set_model(self, model : str):
        self.model = model
-    async def clear(self):
+    def clear(self):
        self.messages = []
        self.messages.append({"role": "system", "content": self.system})
@ -101,10 +152,18 @@ async def send_chat_with_system(model, message, system, tools = None):
    messages = [{'role': 'system', 'content': system}, {'role': 'user', 'content': message}]
    return await send_chat(model, messages, tools)
-async def send_text_file(channel: discord.abc.Messageable, content: str | collections.abc.Sequence[str], message: str = "📄 Full article attached:", filename: str = "article.md") -> None:
+async def send_text_file(channel: discord.abc.Messageable, *args: str | tuple[str,str], message: str = "📄 Full article attached:") -> None:
-    fp = io.BytesIO(content.encode("utf-8"))
+    strings = []
-    file = discord.File(fp, filename=filename)
+    names = []
-    await channel.send(message, file=file)
+    for i, arg in enumerate(args):
        if isinstance(arg, tuple):
            strings.append(arg[1])
            names.append(arg[0])
        else:
            strings.append(arg)
            names.append("Unnamed_file_" + str(i) + ".txt")
    files = [discord.File(io.BytesIO(text.encode("utf-8")), filename=name) for name, text in zip(names, strings)]
    await channel.send(message, files=files)
 def tally_responses(tools):
    increment = 0
@ -153,35 +212,68 @@ async def handle_article_url(message: discord.Message, url: str) -> None:
            }
        ]
-        social = await send_chat_with_system("social", processed_html, social_system_prompt, tools)
+        summary_bot = ChatBot(summary_system_prompt)
        capital = await send_chat_with_system("capital", processed_html, capital_system_prompt, tools)
        facts = await send_chat_with_system("facts", processed_html, facts_system_prompt, tools)
-        print(social)
+        summary_parts = await summary_bot.multi_summary(processed_html, options={
-        print(capital)
+            "temperature": 0.5,
-        print(facts)
+            "num_predict": 300,
            "num_ctx": 4096
        })
-        social_increment, social_decrement = tally_responses(social['message']["tool_calls"])
+        summary_parts_string = [part.content() for part in summary_parts]
-        capital_increment, capital_decrement = tally_responses(capital['message']["tool_calls"])
+
-        facts_increment, facts_decrement = tally_responses(facts['message']["tool_calls"])
+        summary = "\nSummary: ".join(summary_parts_string)
        paragraphs = [para for para in processed_html.split("\n") if len(para.strip()) > 0]
        relevance_bot = ChatBot(relevance_system_prompt)
        paragraph_relevance = []
        for paragraph in paragraphs:
            response = await relevance_bot.single_chat("".join(["-----\n",
                                               "Summary:\n ",
                                               summary, "-----\n",
                                               "Paragraph:\n ",
                                               paragraph, "-----\n"]))
            paragraph_relevance.append(response.content())
        for i, x in enumerate(paragraph_relevance):
            paragraph_relevance[i] = str(int(x))
        average_relevance = sum(int(x) for x in paragraph_relevance) / len(paragraph_relevance)
        median_relevance = int(sorted(paragraph_relevance)[len(paragraph_relevance) // 2])
        relevance_cutoff = min(average_relevance, median_relevance)
        LOGGER.info(f"Relevance cutoff: {relevance_cutoff}")
        relevance_content = [para + " (" + res + "%)" for para, res in zip(paragraphs, paragraph_relevance) if int(res) >= relevance_cutoff]
        relevance_prompt = "\n\n".join(relevance_content)
        # social = await send_chat_with_system("social", processed_html, social_system_prompt, tools)
        # capital = await send_chat_with_system("capital", processed_html, capital_system_prompt, tools)
        # facts = await send_chat_with_system("facts", processed_html, facts_system_prompt, tools)
        # print(social)
        # print(capital)
        # print(facts)
        # social_increment, social_decrement = tally_responses(social['message']["tool_calls"])
        # capital_increment, capital_decrement = tally_responses(capital['message']["tool_calls"])
        # facts_increment, facts_decrement = tally_responses(facts['message']["tool_calls"])
        # TODO: parse `html`, summarise, etc.
        await message.channel.send(f"✅ Article downloaded – {len(processed_html):,} bytes.")
        # time.sleep(0.1)
        # await message.channel.send(f"Social+ {social_increment} | Social- {social_decrement} + Capital+ {capital_increment} | Capital- {capital_decrement} + Facts+ {facts_increment} | Facts- {facts_decrement}")
        time.sleep(0.1)
-        await send_text_file(message.channel, processed_html)
+        # await send_text_file(message.channel, [processed_html, summary, social["message"]["content"], capital["message"]["content"], facts["message"]["content"]], "Files")
-        time.sleep(0.1)
+        await send_text_file(message.channel, ("Raw Article.txt", processed_html), ("Summary.txt", summary), ("Paragraph Relevance.txt", relevance_prompt), message="Files")
        await send_text_file(message.channel, social["message"]["content"], "Social calculations:")
        time.sleep(0.1)
        await message.channel.send(f"Social+ {social_increment} | Social- {social_decrement} + Capital+ {capital_increment} | Capital- {capital_decrement} + Facts+ {facts_increment} | Facts- {facts_decrement}")
        time.sleep(0.1)
        await send_text_file(message.channel, capital["message"]["content"], "capital calculations:")
        time.sleep(0.1)
        await send_text_file(message.channel, facts["message"]["content"], "facts calculations:")
        time.sleep(0.1)
    except Exception as exc:
        await message.channel.send("❌ Sorry, an internal error has occurred. Please try again later or contact an administrator.")
        await message.channel.send(f"```\n{exc}\n```")
        LOGGER.error(exc, exc_info=True)
        await message.channel.send(f"```\n{exc}\n```")
 def extract_first_url(text: str) -> Optional[str]:
--- a/news/pool.py
+++ b/news/pool.py
@ -11,7 +11,7 @@ from typing import Final, Optional, Union, Protocol, Any, Tuple
 import logging
 def process_html(html):
-    return trafilatura.extract(html, output_format='markdown', include_images=True, include_formatting=True,
+    return trafilatura.extract(html, output_format='txt', include_images=True, include_formatting=True,
                        include_tables=True, include_comments=False, favor_recall=True)
 LOGGER = logging.getLogger("pool")