sorta working relevance

main
Brett 2025-06-29 21:32:23 -04:00
parent 1dafa31a60
commit fdffd314fc
3 changed files with 120 additions and 28 deletions

View File

@ -4,7 +4,10 @@ import asyncio
import collections import collections
import logging import logging
import os import os
from typing import Final, Optional, List from pathlib import Path
from typing import Final, Optional, List, NamedTuple
from dataclasses import dataclass
from textwrap import wrap
import discord import discord
from dotenv import load_dotenv from dotenv import load_dotenv
@ -62,6 +65,36 @@ facts_system_prompt = ("You are a specialized analysis program designed to deter
"If the article only presents opinions about genocide, then it is not accurately representing what happened). " "If the article only presents opinions about genocide, then it is not accurately representing what happened). "
"You WILL give rating of this article by calling the increment tool if you read a paragraph (seperated by newlines) which is accurately representing facts, and decrement if it is not.") "You WILL give rating of this article by calling the increment tool if you read a paragraph (seperated by newlines) which is accurately representing facts, and decrement if it is not.")
summary_system_prompt = ("You are a specialized analysis program designed to summarize articles. "
"You WILL be given an article and you WILL output a summary of the article in less than 300 words. "
"Do NOT include that you are attempting to meet the word count in your response.")
relevance_system_prompt = ("You are a specialized analysis program designed to determine if a paragraph is relevant to the topic of the article. "
"You will be given a summary of the article and a paragraph of text. "
"You WILL respond with number between 0 and 100 indicating how relevant the paragraph is to the article."
"You WILL NOT output anything else besides the number. "
"An example response to a given input would be:\n "
"-----\n"
"Summary:\n "
"The president of the United States has been in the White House for 20 years.\n"
"-----\n"
"Paragraph:\n"
"\"The president of the United States has been in the White House for 20 years.\"\n"
"-----\n"
"Your response to this would then look like:\n"
"100")
@dataclass(frozen=True)
class Response:
response: ChatResponse
def content(self):
return self.response["message"]["content"]
def tools(self):
return self.response["message"]["tool_calls"]
class ChatBot: class ChatBot:
def __init__(self, system : str, host : str="192.168.69.3:11434"): def __init__(self, system : str, host : str="192.168.69.3:11434"):
self.client = AsyncClient(host=host) self.client = AsyncClient(host=host)
@ -70,18 +103,36 @@ class ChatBot:
self.model = "llama3.2:3b" self.model = "llama3.2:3b"
self.clear() self.clear()
async def send_message(self, message : str): async def send_message(self, message : str, **kwargs) -> Response:
self.messages.append({"role": "user", "content": message}) self.messages.append({"role": "user", "content": message})
response = await self.client.chat( response = await self.client.chat(
model=self.model, model=self.model,
messages=self.messages, messages=self.messages,
stream=False) stream=False,
**kwargs)
self.messages.append({"role": "assistant", "content": response["message"]["content"]}) self.messages.append({"role": "assistant", "content": response["message"]["content"]})
return Response(response)
async def single_chat(self, message : str, **kwargs) -> Response:
messages = [{"role": "system", "content": self.system}, {"role": "user", "content": message}]
return Response(await self.client.chat(
model=self.model,
messages=messages,
stream=False,
**kwargs
))
async def multi_summary(self, message: str, **kwargs) -> list[Response]:
chunks = wrap(message, width=(4096 - len(self.system) - 64))
responses = []
for chunk in chunks:
responses.append(await self.single_chat(chunk, **kwargs))
return responses
async def set_model(self, model : str): async def set_model(self, model : str):
self.model = model self.model = model
async def clear(self): def clear(self):
self.messages = [] self.messages = []
self.messages.append({"role": "system", "content": self.system}) self.messages.append({"role": "system", "content": self.system})
@ -101,10 +152,18 @@ async def send_chat_with_system(model, message, system, tools = None):
messages = [{'role': 'system', 'content': system}, {'role': 'user', 'content': message}] messages = [{'role': 'system', 'content': system}, {'role': 'user', 'content': message}]
return await send_chat(model, messages, tools) return await send_chat(model, messages, tools)
async def send_text_file(channel: discord.abc.Messageable, content: str | collections.abc.Sequence[str], message: str = "📄 Full article attached:", filename: str = "article.md") -> None: async def send_text_file(channel: discord.abc.Messageable, *args: str | tuple[str,str], message: str = "📄 Full article attached:") -> None:
fp = io.BytesIO(content.encode("utf-8")) strings = []
file = discord.File(fp, filename=filename) names = []
await channel.send(message, file=file) for i, arg in enumerate(args):
if isinstance(arg, tuple):
strings.append(arg[1])
names.append(arg[0])
else:
strings.append(arg)
names.append("Unnamed_file_" + str(i) + ".txt")
files = [discord.File(io.BytesIO(text.encode("utf-8")), filename=name) for name, text in zip(names, strings)]
await channel.send(message, files=files)
def tally_responses(tools): def tally_responses(tools):
increment = 0 increment = 0
@ -153,35 +212,68 @@ async def handle_article_url(message: discord.Message, url: str) -> None:
} }
] ]
social = await send_chat_with_system("social", processed_html, social_system_prompt, tools) summary_bot = ChatBot(summary_system_prompt)
capital = await send_chat_with_system("capital", processed_html, capital_system_prompt, tools)
facts = await send_chat_with_system("facts", processed_html, facts_system_prompt, tools)
print(social) summary_parts = await summary_bot.multi_summary(processed_html, options={
print(capital) "temperature": 0.5,
print(facts) "num_predict": 300,
"num_ctx": 4096
})
social_increment, social_decrement = tally_responses(social['message']["tool_calls"]) summary_parts_string = [part.content() for part in summary_parts]
capital_increment, capital_decrement = tally_responses(capital['message']["tool_calls"])
facts_increment, facts_decrement = tally_responses(facts['message']["tool_calls"]) summary = "\nSummary: ".join(summary_parts_string)
paragraphs = [para for para in processed_html.split("\n") if len(para.strip()) > 0]
relevance_bot = ChatBot(relevance_system_prompt)
paragraph_relevance = []
for paragraph in paragraphs:
response = await relevance_bot.single_chat("".join(["-----\n",
"Summary:\n ",
summary, "-----\n",
"Paragraph:\n ",
paragraph, "-----\n"]))
paragraph_relevance.append(response.content())
for i, x in enumerate(paragraph_relevance):
paragraph_relevance[i] = str(int(x))
average_relevance = sum(int(x) for x in paragraph_relevance) / len(paragraph_relevance)
median_relevance = int(sorted(paragraph_relevance)[len(paragraph_relevance) // 2])
relevance_cutoff = min(average_relevance, median_relevance)
LOGGER.info(f"Relevance cutoff: {relevance_cutoff}")
relevance_content = [para + " (" + res + "%)" for para, res in zip(paragraphs, paragraph_relevance) if int(res) >= relevance_cutoff]
relevance_prompt = "\n\n".join(relevance_content)
# social = await send_chat_with_system("social", processed_html, social_system_prompt, tools)
# capital = await send_chat_with_system("capital", processed_html, capital_system_prompt, tools)
# facts = await send_chat_with_system("facts", processed_html, facts_system_prompt, tools)
# print(social)
# print(capital)
# print(facts)
# social_increment, social_decrement = tally_responses(social['message']["tool_calls"])
# capital_increment, capital_decrement = tally_responses(capital['message']["tool_calls"])
# facts_increment, facts_decrement = tally_responses(facts['message']["tool_calls"])
# TODO: parse `html`, summarise, etc. # TODO: parse `html`, summarise, etc.
await message.channel.send(f"✅ Article downloaded {len(processed_html):,} bytes.") await message.channel.send(f"✅ Article downloaded {len(processed_html):,} bytes.")
# time.sleep(0.1)
# await message.channel.send(f"Social+ {social_increment} | Social- {social_decrement} + Capital+ {capital_increment} | Capital- {capital_decrement} + Facts+ {facts_increment} | Facts- {facts_decrement}")
time.sleep(0.1) time.sleep(0.1)
await send_text_file(message.channel, processed_html) # await send_text_file(message.channel, [processed_html, summary, social["message"]["content"], capital["message"]["content"], facts["message"]["content"]], "Files")
time.sleep(0.1) await send_text_file(message.channel, ("Raw Article.txt", processed_html), ("Summary.txt", summary), ("Paragraph Relevance.txt", relevance_prompt), message="Files")
await send_text_file(message.channel, social["message"]["content"], "Social calculations:")
time.sleep(0.1)
await message.channel.send(f"Social+ {social_increment} | Social- {social_decrement} + Capital+ {capital_increment} | Capital- {capital_decrement} + Facts+ {facts_increment} | Facts- {facts_decrement}")
time.sleep(0.1)
await send_text_file(message.channel, capital["message"]["content"], "capital calculations:")
time.sleep(0.1)
await send_text_file(message.channel, facts["message"]["content"], "facts calculations:")
time.sleep(0.1) time.sleep(0.1)
except Exception as exc: except Exception as exc:
await message.channel.send("❌ Sorry, an internal error has occurred. Please try again later or contact an administrator.") await message.channel.send("❌ Sorry, an internal error has occurred. Please try again later or contact an administrator.")
await message.channel.send(f"```\n{exc}\n```")
LOGGER.error(exc, exc_info=True) LOGGER.error(exc, exc_info=True)
await message.channel.send(f"```\n{exc}\n```")
def extract_first_url(text: str) -> Optional[str]: def extract_first_url(text: str) -> Optional[str]:

View File

@ -11,7 +11,7 @@ from typing import Final, Optional, Union, Protocol, Any, Tuple
import logging import logging
def process_html(html): def process_html(html):
return trafilatura.extract(html, output_format='markdown', include_images=True, include_formatting=True, return trafilatura.extract(html, output_format='txt', include_images=True, include_formatting=True,
include_tables=True, include_comments=False, favor_recall=True) include_tables=True, include_comments=False, favor_recall=True)
LOGGER = logging.getLogger("pool") LOGGER = logging.getLogger("pool")