Compare commits

..

No commits in common. "a61a67c4263f9899520de293e28d422d729aa2f2" and "fdffd314fcb5995ef36fcb8d11ba7306bfe25131" have entirely different histories.

4 changed files with 49 additions and 69 deletions

1
news/.gitignore vendored
View File

@ -1,2 +1 @@
*.sqlite3
.env

View File

@ -7,7 +7,7 @@ import os
from pathlib import Path
from typing import Final, Optional, List, NamedTuple
from dataclasses import dataclass
from textwrap import wrap, fill
from textwrap import wrap
import discord
from dotenv import load_dotenv
@ -19,7 +19,6 @@ from ollama import ChatResponse
from ollama import Client
from ollama import AsyncClient
import time
import json
load_dotenv()
@ -35,8 +34,7 @@ bot = discord.Client(intents=intents)
LOGGER = logging.getLogger("main")
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s (in %(filename)s:%(lineno)d): %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
article_repository = ArticleRepository()
@ -86,11 +84,6 @@ relevance_system_prompt = ("You are a specialized analysis program designed to d
"Your response to this would then look like:\n"
"100")
relevance_system_prompt_2 = "\n".join(["You are a specialized analysis program designed to determine if a paragraph is relevant to the "
"topic of the article based on various snippets and meta-information gathered from the article.",
"You will be given different inputs and prompts by the user where you MUST respond with a "
"YES or NO depending on if that input is relevant to the paragraph."])
@dataclass(frozen=True)
class Response:
response: ChatResponse
@ -139,10 +132,6 @@ class ChatBot:
async def set_model(self, model : str):
self.model = model
def set_system(self, system : str):
self.system = system
self.clear()
def clear(self):
self.messages = []
self.messages.append({"role": "system", "content": self.system})
@ -176,18 +165,18 @@ async def send_text_file(channel: discord.abc.Messageable, *args: str | tuple[st
files = [discord.File(io.BytesIO(text.encode("utf-8")), filename=name) for name, text in zip(names, strings)]
await channel.send(message, files=files)
def tally_responses(array: list[str]):
yes = 0
no = 0
err = 0
for a in array:
if a.upper() == "YES":
yes += 1
elif a.upper() == "NO":
no += 1
else:
err += 1
return yes, no, err
def tally_responses(tools):
increment = 0
decrement = 0
if tools:
for tool in tools:
if tool['function']['name'] == "increment":
increment += 1
elif tool['function']['name'] == "decrement":
decrement += 1
else:
LOGGER.warning(f"Unknown tool: {tool}")
return increment, decrement
async def handle_article_url(message: discord.Message, url: str) -> None:
@ -196,6 +185,33 @@ async def handle_article_url(message: discord.Message, url: str) -> None:
try:
title, processed_html = await article_repository.get_article(url)
tools = [
{
'type': 'function',
'function': {
'name': 'increment',
'description': 'increment internal counter by 1',
'parameters': {
'type': 'object',
'properties': {},
'required': []
}
}
},
{
'type': 'function',
'function': {
'name': 'decrement',
'description': 'decrement internal counter by 1',
'parameters': {
'type': 'object',
'properties': {},
'required': []
}
}
}
]
summary_bot = ChatBot(summary_system_prompt)
summary_parts = await summary_bot.multi_summary(processed_html, options={
@ -204,69 +220,34 @@ async def handle_article_url(message: discord.Message, url: str) -> None:
"num_ctx": 4096
})
summary_bot.set_system("You are a specialized analysis program designed to summarize articles into their key points.\n "
"You WILL only output a JSON list of key points, structured as {key_points: [\"keypoint1\", \"keypoint2\",...]}. ")
try:
keywords = [item for sublist in (json.loads(sumr.content())["key_points"] for sumr in await summary_bot.multi_summary(processed_html, options={
"temperature": 0.5,
"num_ctx": 4096
})) for item in sublist]
LOGGER.info(keywords)
except Exception as exc:
LOGGER.error("Failed to correctly parse LLM output. It is likely that it has failed.")
LOGGER.error(exc, exc_info=True)
keywords = []
summary_parts_string = [part.content() for part in summary_parts]
summary = "\n".join(summary_parts_string)
summary = "\nSummary: ".join(summary_parts_string)
paragraphs = [para for para in processed_html.split("\n") if len(para.strip()) > 0]
relevance_bot = ChatBot(relevance_system_prompt)
relevance_bot2 = ChatBot(relevance_system_prompt_2)
paragraph_relevance = []
paragraph_restitutions = []
paragraph_keypoints = []
for paragraph in paragraphs:
response = await relevance_bot.single_chat("".join(["-----\n",
"Summary:\n ",
summary, "-----\n",
"Paragraph:\n ",
paragraph, "-----\n"
"REMEMBER: You WILL respond with number between 0 and 100 indicating how relevant the paragraph is to the article."]))
relevance_bot2.clear()
LOGGER.info(await relevance_bot2.send_message("DO NOT RESPOND TO THIS MESSAGE. The Paragraph you will analyze is as follows:\n\n" + paragraph))
res = await relevance_bot2.send_message("Is the paragraph relevant to the following summary? Remember, please respond with either YES or NO.\n\n" + summary)
LOGGER.info(f"Summary Relevancy using chat: {res.content()}")
restitutions = []
for keypoint in keywords:
keypoint_is_rev = await relevance_bot2.send_message(
"A key point is an idea that the article is communicating to the reader. Given that, is the paragraph relevant to the following key point. Remember: Please respond with either YES or NO.\n\n" + keypoint)
LOGGER.info(f"Running for keyword {keypoint} got response {keypoint_is_rev.content()}")
restitutions.append(keypoint_is_rev.content())
restitutions.append(res.content())
yes, no, err = tally_responses(restitutions)
total = yes + no + err
paragraph, "-----\n"]))
paragraph_relevance.append(response.content())
paragraph_restitutions.append(restitutions)
paragraph_keypoints.append((yes / total) * 100)
for i, x in enumerate(paragraph_relevance):
paragraph_relevance[i] = int(x)
paragraph_relevance[i] = str(int(x))
average_relevance = (sum(int(x) for x in paragraph_relevance) / len(paragraph_relevance) + sum(paragraph_keypoints)) / 2
median_relevance = sorted(int(ref) for ref in paragraph_relevance)[len(paragraph_relevance) // 2]
median_relevance2 = sorted(paragraph_keypoints)[len(paragraph_keypoints) // 2]
average_relevance = sum(int(x) for x in paragraph_relevance) / len(paragraph_relevance)
median_relevance = int(sorted(paragraph_relevance)[len(paragraph_relevance) // 2])
relevance_cutoff = min(average_relevance, median_relevance)
LOGGER.info(f"Relevance cutoff: {relevance_cutoff}")
relevance_cutoff = min(average_relevance, (median_relevance + median_relevance2) / 2)
LOGGER.info(f"Relevance cutoff: {relevance_cutoff} From ({average_relevance}, {(median_relevance + median_relevance2) / 2})")
relevance_content = [fill(para + " (" + str(res) + "%) [" + ",".join(li) + "]" + "(" + str(point) + "%)", 80) for para, res, li, point, in zip(paragraphs, paragraph_relevance, paragraph_restitutions, paragraph_keypoints)]
relevance_content = [para + " (" + res + "%)" for para, res in zip(paragraphs, paragraph_relevance) if int(res) >= relevance_cutoff]
relevance_prompt = "\n\n".join(relevance_content)
# social = await send_chat_with_system("social", processed_html, social_system_prompt, tools)

Binary file not shown.