Compare commits

...

3 Commits

Author SHA1 Message Date
Brett a61a67c426 hello there 2025-06-30 20:47:18 -04:00
Brett 0950cda099 join 2025-06-30 14:21:57 -04:00
Brett 406741c80a hello 2025-06-30 14:11:09 -04:00
4 changed files with 69 additions and 49 deletions

1
news/.gitignore vendored
View File

@ -1 +1,2 @@
*.sqlite3
.env

View File

@ -7,7 +7,7 @@ import os
from pathlib import Path
from typing import Final, Optional, List, NamedTuple
from dataclasses import dataclass
from textwrap import wrap
from textwrap import wrap, fill
import discord
from dotenv import load_dotenv
@ -19,6 +19,7 @@ from ollama import ChatResponse
from ollama import Client
from ollama import AsyncClient
import time
import json
load_dotenv()
@ -34,7 +35,8 @@ bot = discord.Client(intents=intents)
LOGGER = logging.getLogger("main")
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
format="%(asctime)s [%(levelname)s] %(name)s (in %(filename)s:%(lineno)d): %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
article_repository = ArticleRepository()
@ -84,6 +86,11 @@ relevance_system_prompt = ("You are a specialized analysis program designed to d
"Your response to this would then look like:\n"
"100")
relevance_system_prompt_2 = "\n".join(["You are a specialized analysis program designed to determine if a paragraph is relevant to the "
"topic of the article based on various snippets and meta-information gathered from the article.",
"You will be given different inputs and prompts by the user where you MUST respond with a "
"YES or NO depending on if that input is relevant to the paragraph."])
@dataclass(frozen=True)
class Response:
response: ChatResponse
@ -132,6 +139,10 @@ class ChatBot:
async def set_model(self, model : str):
self.model = model
def set_system(self, system : str):
self.system = system
self.clear()
def clear(self):
self.messages = []
self.messages.append({"role": "system", "content": self.system})
@ -165,18 +176,18 @@ async def send_text_file(channel: discord.abc.Messageable, *args: str | tuple[st
files = [discord.File(io.BytesIO(text.encode("utf-8")), filename=name) for name, text in zip(names, strings)]
await channel.send(message, files=files)
def tally_responses(tools):
increment = 0
decrement = 0
if tools:
for tool in tools:
if tool['function']['name'] == "increment":
increment += 1
elif tool['function']['name'] == "decrement":
decrement += 1
else:
LOGGER.warning(f"Unknown tool: {tool}")
return increment, decrement
def tally_responses(array: list[str]):
yes = 0
no = 0
err = 0
for a in array:
if a.upper() == "YES":
yes += 1
elif a.upper() == "NO":
no += 1
else:
err += 1
return yes, no, err
async def handle_article_url(message: discord.Message, url: str) -> None:
@ -185,33 +196,6 @@ async def handle_article_url(message: discord.Message, url: str) -> None:
try:
title, processed_html = await article_repository.get_article(url)
tools = [
{
'type': 'function',
'function': {
'name': 'increment',
'description': 'increment internal counter by 1',
'parameters': {
'type': 'object',
'properties': {},
'required': []
}
}
},
{
'type': 'function',
'function': {
'name': 'decrement',
'description': 'decrement internal counter by 1',
'parameters': {
'type': 'object',
'properties': {},
'required': []
}
}
}
]
summary_bot = ChatBot(summary_system_prompt)
summary_parts = await summary_bot.multi_summary(processed_html, options={
@ -220,34 +204,69 @@ async def handle_article_url(message: discord.Message, url: str) -> None:
"num_ctx": 4096
})
summary_bot.set_system("You are a specialized analysis program designed to summarize articles into their key points.\n "
"You WILL only output a JSON list of key points, structured as {key_points: [\"keypoint1\", \"keypoint2\",...]}. ")
try:
keywords = [item for sublist in (json.loads(sumr.content())["key_points"] for sumr in await summary_bot.multi_summary(processed_html, options={
"temperature": 0.5,
"num_ctx": 4096
})) for item in sublist]
LOGGER.info(keywords)
except Exception as exc:
LOGGER.error("Failed to correctly parse LLM output. It is likely that it has failed.")
LOGGER.error(exc, exc_info=True)
keywords = []
summary_parts_string = [part.content() for part in summary_parts]
summary = "\nSummary: ".join(summary_parts_string)
summary = "\n".join(summary_parts_string)
paragraphs = [para for para in processed_html.split("\n") if len(para.strip()) > 0]
relevance_bot = ChatBot(relevance_system_prompt)
relevance_bot2 = ChatBot(relevance_system_prompt_2)
paragraph_relevance = []
paragraph_restitutions = []
paragraph_keypoints = []
for paragraph in paragraphs:
response = await relevance_bot.single_chat("".join(["-----\n",
"Summary:\n ",
summary, "-----\n",
"Paragraph:\n ",
paragraph, "-----\n"]))
paragraph, "-----\n"
"REMEMBER: You WILL respond with number between 0 and 100 indicating how relevant the paragraph is to the article."]))
relevance_bot2.clear()
LOGGER.info(await relevance_bot2.send_message("DO NOT RESPOND TO THIS MESSAGE. The Paragraph you will analyze is as follows:\n\n" + paragraph))
res = await relevance_bot2.send_message("Is the paragraph relevant to the following summary? Remember, please respond with either YES or NO.\n\n" + summary)
LOGGER.info(f"Summary Relevancy using chat: {res.content()}")
restitutions = []
for keypoint in keywords:
keypoint_is_rev = await relevance_bot2.send_message(
"A key point is an idea that the article is communicating to the reader. Given that, is the paragraph relevant to the following key point. Remember: Please respond with either YES or NO.\n\n" + keypoint)
LOGGER.info(f"Running for keyword {keypoint} got response {keypoint_is_rev.content()}")
restitutions.append(keypoint_is_rev.content())
restitutions.append(res.content())
yes, no, err = tally_responses(restitutions)
total = yes + no + err
paragraph_relevance.append(response.content())
paragraph_restitutions.append(restitutions)
paragraph_keypoints.append((yes / total) * 100)
for i, x in enumerate(paragraph_relevance):
paragraph_relevance[i] = str(int(x))
paragraph_relevance[i] = int(x)
average_relevance = sum(int(x) for x in paragraph_relevance) / len(paragraph_relevance)
median_relevance = int(sorted(paragraph_relevance)[len(paragraph_relevance) // 2])
average_relevance = (sum(int(x) for x in paragraph_relevance) / len(paragraph_relevance) + sum(paragraph_keypoints)) / 2
median_relevance = sorted(int(ref) for ref in paragraph_relevance)[len(paragraph_relevance) // 2]
median_relevance2 = sorted(paragraph_keypoints)[len(paragraph_keypoints) // 2]
relevance_cutoff = min(average_relevance, median_relevance)
LOGGER.info(f"Relevance cutoff: {relevance_cutoff}")
relevance_content = [para + " (" + res + "%)" for para, res in zip(paragraphs, paragraph_relevance) if int(res) >= relevance_cutoff]
relevance_cutoff = min(average_relevance, (median_relevance + median_relevance2) / 2)
LOGGER.info(f"Relevance cutoff: {relevance_cutoff} From ({average_relevance}, {(median_relevance + median_relevance2) / 2})")
relevance_content = [fill(para + " (" + str(res) + "%) [" + ",".join(li) + "]" + "(" + str(point) + "%)", 80) for para, res, li, point, in zip(paragraphs, paragraph_relevance, paragraph_restitutions, paragraph_keypoints)]
relevance_prompt = "\n\n".join(relevance_content)
# social = await send_chat_with_system("social", processed_html, social_system_prompt, tools)

BIN
newsulizer.sqlite3 Normal file

Binary file not shown.