Compare commits
No commits in common. "a61a67c4263f9899520de293e28d422d729aa2f2" and "fdffd314fcb5995ef36fcb8d11ba7306bfe25131" have entirely different histories.
a61a67c426
...
fdffd314fc
|
@ -1,2 +1 @@
|
||||||
*.sqlite3
|
*.sqlite3
|
||||||
.env
|
|
Binary file not shown.
117
news/main.py
117
news/main.py
|
@ -7,7 +7,7 @@ import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Final, Optional, List, NamedTuple
|
from typing import Final, Optional, List, NamedTuple
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from textwrap import wrap, fill
|
from textwrap import wrap
|
||||||
|
|
||||||
import discord
|
import discord
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
@ -19,7 +19,6 @@ from ollama import ChatResponse
|
||||||
from ollama import Client
|
from ollama import Client
|
||||||
from ollama import AsyncClient
|
from ollama import AsyncClient
|
||||||
import time
|
import time
|
||||||
import json
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
@ -35,8 +34,7 @@ bot = discord.Client(intents=intents)
|
||||||
LOGGER = logging.getLogger("main")
|
LOGGER = logging.getLogger("main")
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
format="%(asctime)s [%(levelname)s] %(name)s (in %(filename)s:%(lineno)d): %(message)s",
|
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||||
datefmt="%Y-%m-%d %H:%M:%S"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
article_repository = ArticleRepository()
|
article_repository = ArticleRepository()
|
||||||
|
@ -86,11 +84,6 @@ relevance_system_prompt = ("You are a specialized analysis program designed to d
|
||||||
"Your response to this would then look like:\n"
|
"Your response to this would then look like:\n"
|
||||||
"100")
|
"100")
|
||||||
|
|
||||||
relevance_system_prompt_2 = "\n".join(["You are a specialized analysis program designed to determine if a paragraph is relevant to the "
|
|
||||||
"topic of the article based on various snippets and meta-information gathered from the article.",
|
|
||||||
"You will be given different inputs and prompts by the user where you MUST respond with a "
|
|
||||||
"YES or NO depending on if that input is relevant to the paragraph."])
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class Response:
|
class Response:
|
||||||
response: ChatResponse
|
response: ChatResponse
|
||||||
|
@ -139,10 +132,6 @@ class ChatBot:
|
||||||
async def set_model(self, model : str):
|
async def set_model(self, model : str):
|
||||||
self.model = model
|
self.model = model
|
||||||
|
|
||||||
def set_system(self, system : str):
|
|
||||||
self.system = system
|
|
||||||
self.clear()
|
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
self.messages = []
|
self.messages = []
|
||||||
self.messages.append({"role": "system", "content": self.system})
|
self.messages.append({"role": "system", "content": self.system})
|
||||||
|
@ -176,18 +165,18 @@ async def send_text_file(channel: discord.abc.Messageable, *args: str | tuple[st
|
||||||
files = [discord.File(io.BytesIO(text.encode("utf-8")), filename=name) for name, text in zip(names, strings)]
|
files = [discord.File(io.BytesIO(text.encode("utf-8")), filename=name) for name, text in zip(names, strings)]
|
||||||
await channel.send(message, files=files)
|
await channel.send(message, files=files)
|
||||||
|
|
||||||
def tally_responses(array: list[str]):
|
def tally_responses(tools):
|
||||||
yes = 0
|
increment = 0
|
||||||
no = 0
|
decrement = 0
|
||||||
err = 0
|
if tools:
|
||||||
for a in array:
|
for tool in tools:
|
||||||
if a.upper() == "YES":
|
if tool['function']['name'] == "increment":
|
||||||
yes += 1
|
increment += 1
|
||||||
elif a.upper() == "NO":
|
elif tool['function']['name'] == "decrement":
|
||||||
no += 1
|
decrement += 1
|
||||||
else:
|
else:
|
||||||
err += 1
|
LOGGER.warning(f"Unknown tool: {tool}")
|
||||||
return yes, no, err
|
return increment, decrement
|
||||||
|
|
||||||
|
|
||||||
async def handle_article_url(message: discord.Message, url: str) -> None:
|
async def handle_article_url(message: discord.Message, url: str) -> None:
|
||||||
|
@ -196,6 +185,33 @@ async def handle_article_url(message: discord.Message, url: str) -> None:
|
||||||
try:
|
try:
|
||||||
title, processed_html = await article_repository.get_article(url)
|
title, processed_html = await article_repository.get_article(url)
|
||||||
|
|
||||||
|
tools = [
|
||||||
|
{
|
||||||
|
'type': 'function',
|
||||||
|
'function': {
|
||||||
|
'name': 'increment',
|
||||||
|
'description': 'increment internal counter by 1',
|
||||||
|
'parameters': {
|
||||||
|
'type': 'object',
|
||||||
|
'properties': {},
|
||||||
|
'required': []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'type': 'function',
|
||||||
|
'function': {
|
||||||
|
'name': 'decrement',
|
||||||
|
'description': 'decrement internal counter by 1',
|
||||||
|
'parameters': {
|
||||||
|
'type': 'object',
|
||||||
|
'properties': {},
|
||||||
|
'required': []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
summary_bot = ChatBot(summary_system_prompt)
|
summary_bot = ChatBot(summary_system_prompt)
|
||||||
|
|
||||||
summary_parts = await summary_bot.multi_summary(processed_html, options={
|
summary_parts = await summary_bot.multi_summary(processed_html, options={
|
||||||
|
@ -204,69 +220,34 @@ async def handle_article_url(message: discord.Message, url: str) -> None:
|
||||||
"num_ctx": 4096
|
"num_ctx": 4096
|
||||||
})
|
})
|
||||||
|
|
||||||
summary_bot.set_system("You are a specialized analysis program designed to summarize articles into their key points.\n "
|
|
||||||
"You WILL only output a JSON list of key points, structured as {key_points: [\"keypoint1\", \"keypoint2\",...]}. ")
|
|
||||||
|
|
||||||
try:
|
|
||||||
keywords = [item for sublist in (json.loads(sumr.content())["key_points"] for sumr in await summary_bot.multi_summary(processed_html, options={
|
|
||||||
"temperature": 0.5,
|
|
||||||
"num_ctx": 4096
|
|
||||||
})) for item in sublist]
|
|
||||||
LOGGER.info(keywords)
|
|
||||||
except Exception as exc:
|
|
||||||
LOGGER.error("Failed to correctly parse LLM output. It is likely that it has failed.")
|
|
||||||
LOGGER.error(exc, exc_info=True)
|
|
||||||
keywords = []
|
|
||||||
|
|
||||||
summary_parts_string = [part.content() for part in summary_parts]
|
summary_parts_string = [part.content() for part in summary_parts]
|
||||||
|
|
||||||
summary = "\n".join(summary_parts_string)
|
summary = "\nSummary: ".join(summary_parts_string)
|
||||||
|
|
||||||
paragraphs = [para for para in processed_html.split("\n") if len(para.strip()) > 0]
|
paragraphs = [para for para in processed_html.split("\n") if len(para.strip()) > 0]
|
||||||
|
|
||||||
relevance_bot = ChatBot(relevance_system_prompt)
|
relevance_bot = ChatBot(relevance_system_prompt)
|
||||||
relevance_bot2 = ChatBot(relevance_system_prompt_2)
|
|
||||||
|
|
||||||
paragraph_relevance = []
|
paragraph_relevance = []
|
||||||
paragraph_restitutions = []
|
|
||||||
paragraph_keypoints = []
|
|
||||||
|
|
||||||
for paragraph in paragraphs:
|
for paragraph in paragraphs:
|
||||||
response = await relevance_bot.single_chat("".join(["-----\n",
|
response = await relevance_bot.single_chat("".join(["-----\n",
|
||||||
"Summary:\n ",
|
"Summary:\n ",
|
||||||
summary, "-----\n",
|
summary, "-----\n",
|
||||||
"Paragraph:\n ",
|
"Paragraph:\n ",
|
||||||
paragraph, "-----\n"
|
paragraph, "-----\n"]))
|
||||||
"REMEMBER: You WILL respond with number between 0 and 100 indicating how relevant the paragraph is to the article."]))
|
|
||||||
relevance_bot2.clear()
|
|
||||||
LOGGER.info(await relevance_bot2.send_message("DO NOT RESPOND TO THIS MESSAGE. The Paragraph you will analyze is as follows:\n\n" + paragraph))
|
|
||||||
res = await relevance_bot2.send_message("Is the paragraph relevant to the following summary? Remember, please respond with either YES or NO.\n\n" + summary)
|
|
||||||
LOGGER.info(f"Summary Relevancy using chat: {res.content()}")
|
|
||||||
restitutions = []
|
|
||||||
for keypoint in keywords:
|
|
||||||
keypoint_is_rev = await relevance_bot2.send_message(
|
|
||||||
"A key point is an idea that the article is communicating to the reader. Given that, is the paragraph relevant to the following key point. Remember: Please respond with either YES or NO.\n\n" + keypoint)
|
|
||||||
LOGGER.info(f"Running for keyword {keypoint} got response {keypoint_is_rev.content()}")
|
|
||||||
restitutions.append(keypoint_is_rev.content())
|
|
||||||
restitutions.append(res.content())
|
|
||||||
yes, no, err = tally_responses(restitutions)
|
|
||||||
total = yes + no + err
|
|
||||||
paragraph_relevance.append(response.content())
|
paragraph_relevance.append(response.content())
|
||||||
paragraph_restitutions.append(restitutions)
|
|
||||||
paragraph_keypoints.append((yes / total) * 100)
|
|
||||||
|
|
||||||
for i, x in enumerate(paragraph_relevance):
|
for i, x in enumerate(paragraph_relevance):
|
||||||
paragraph_relevance[i] = int(x)
|
paragraph_relevance[i] = str(int(x))
|
||||||
|
|
||||||
average_relevance = (sum(int(x) for x in paragraph_relevance) / len(paragraph_relevance) + sum(paragraph_keypoints)) / 2
|
average_relevance = sum(int(x) for x in paragraph_relevance) / len(paragraph_relevance)
|
||||||
median_relevance = sorted(int(ref) for ref in paragraph_relevance)[len(paragraph_relevance) // 2]
|
median_relevance = int(sorted(paragraph_relevance)[len(paragraph_relevance) // 2])
|
||||||
median_relevance2 = sorted(paragraph_keypoints)[len(paragraph_keypoints) // 2]
|
|
||||||
|
|
||||||
|
relevance_cutoff = min(average_relevance, median_relevance)
|
||||||
|
LOGGER.info(f"Relevance cutoff: {relevance_cutoff}")
|
||||||
|
|
||||||
relevance_cutoff = min(average_relevance, (median_relevance + median_relevance2) / 2)
|
relevance_content = [para + " (" + res + "%)" for para, res in zip(paragraphs, paragraph_relevance) if int(res) >= relevance_cutoff]
|
||||||
LOGGER.info(f"Relevance cutoff: {relevance_cutoff} From ({average_relevance}, {(median_relevance + median_relevance2) / 2})")
|
|
||||||
|
|
||||||
relevance_content = [fill(para + " (" + str(res) + "%) [" + ",".join(li) + "]" + "(" + str(point) + "%)", 80) for para, res, li, point, in zip(paragraphs, paragraph_relevance, paragraph_restitutions, paragraph_keypoints)]
|
|
||||||
relevance_prompt = "\n\n".join(relevance_content)
|
relevance_prompt = "\n\n".join(relevance_content)
|
||||||
|
|
||||||
# social = await send_chat_with_system("social", processed_html, social_system_prompt, tools)
|
# social = await send_chat_with_system("social", processed_html, social_system_prompt, tools)
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue