big think
parent
de6f9efbed
commit
275ac6e4ed
Binary file not shown.
36
news/main.py
36
news/main.py
|
@ -34,14 +34,14 @@ logging.basicConfig(
|
|||
|
||||
article_repository = ArticleRepository()
|
||||
|
||||
async def send_chat(messages):
|
||||
async def send_chat(messages, fmt):
|
||||
return await AsyncClient(host="192.168.69.3:11434").chat(
|
||||
model="deepseek-r1:8b",
|
||||
messages=messages,
|
||||
stream=False,
|
||||
options={
|
||||
'temperature': 0.5,
|
||||
"num_ctx": 128000
|
||||
# "num_ctx": 128000
|
||||
},
|
||||
think=True)
|
||||
|
||||
|
@ -60,13 +60,39 @@ async def handle_article_url(message: discord.Message, url: str) -> None:
|
|||
LOGGER.info("Received URL from %s: %s", message.author, url)
|
||||
|
||||
try:
|
||||
processed_html = await article_repository.get_article(url)
|
||||
title, processed_html = await article_repository.get_article(url)
|
||||
paragraphs = processed_html.split("\n")
|
||||
paragraphs = [f"\"Paragraph ({i + 1})\": {paragraph.strip()}" for i, paragraph in enumerate(paragraphs)]
|
||||
processed_graphs = [{"role": "user", "content": paragraph} for paragraph in paragraphs]
|
||||
# print(paragraphs)
|
||||
# print(processed_graphs)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": "You are an expert article-analysis assistant."
|
||||
# "You WILL respond in JSON format."
|
||||
"Your job is to analyse paragraphs in the article and look for provocative, emotionally charged, and loaded language"
|
||||
"You will analyse the paragraphs, determine if they are provocative, and if so, output a capital X for each problematic word."
|
||||
"Questions you should ask yourself while reading the paragraph:"
|
||||
"1. What is the literal meaning of the questionable word or phrase?"
|
||||
"2. What is the emotional or social context of the questionable word or phrase?"
|
||||
"3. Does that word or phrase have any connotations, that is, associations that are positive or negative?"
|
||||
"4. What group (sometimes called a “discourse community”) favors one locution over another, and why?"
|
||||
"5. Is the word or phrase “loaded”? How far does it steer us from neutral?"
|
||||
"6. Does the word or phrase help me see, or does it prevent me from seeing? (This is important)"
|
||||
"You will now be provided with the headline of the article then a paragraph from the article."
|
||||
"The headline (title of the page) will be provided as \"Headline\": \"EXAMPLE HEADLINE\"."
|
||||
"The paragraphs will be provided as \"Paragraph (numbered index)\": \"EXAMPLE PARAGRAPH\"."},
|
||||
]
|
||||
messages.extend(processed_graphs)
|
||||
response = await send_chat(messages, "json")
|
||||
print(response)
|
||||
# TODO: parse `html`, summarise, etc.
|
||||
await message.channel.send(f"✅ Article downloaded – {len(processed_html):,} bytes.")
|
||||
await send_text_file(message.channel, processed_html)
|
||||
except:
|
||||
LOGGER.exception("Playwright failed")
|
||||
await send_text_file(message.channel, response['response'])
|
||||
except Exception as exc:
|
||||
await message.channel.send("❌ Sorry, I couldn't fetch that page.")
|
||||
await message.channel.send(f"```\n{exc}\n```")
|
||||
|
||||
|
||||
def extract_first_url(text: str) -> Optional[str]:
|
||||
|
|
Binary file not shown.
25
news/pool.py
25
news/pool.py
|
@ -61,7 +61,7 @@ class PlaywrightPool:
|
|||
await cls._pw.stop()
|
||||
|
||||
@classmethod
|
||||
async def fetch_html(cls, url: str) -> str:
|
||||
async def fetch_html(cls, url: str) -> tuple[str, str]:
|
||||
if cls._browser is None:
|
||||
await cls.start()
|
||||
|
||||
|
@ -70,7 +70,8 @@ class PlaywrightPool:
|
|||
try:
|
||||
await page.goto(url, wait_until="load", timeout=60_000)
|
||||
html = await page.content()
|
||||
return html
|
||||
title = await page.title()
|
||||
return title, html
|
||||
finally:
|
||||
await page.close()
|
||||
|
||||
|
@ -139,7 +140,7 @@ class ArticleRepository:
|
|||
# ------------------------------------------------------------------ #
|
||||
# public API
|
||||
# ------------------------------------------------------------------ #
|
||||
async def get_article(self, url: str) -> str:
|
||||
async def get_article(self, url: str) -> tuple[str, str]:
|
||||
"""
|
||||
Main entry point.
|
||||
• Returns the processed text if it is already cached.
|
||||
|
@ -150,29 +151,30 @@ class ArticleRepository:
|
|||
async with self._lock:
|
||||
row = self._row_for_url(url)
|
||||
|
||||
if row and row[3]: # row = (id, url, raw, processed)
|
||||
if row: # row = (id, url, title, raw, processed)
|
||||
LOGGER.info(f"[ArticleRepository] Found cached article for {url}")
|
||||
return row[3] # processed_html already present
|
||||
return row[2], row[4] # processed_html already present
|
||||
|
||||
LOGGER.info(f"[ArticleRepository] Downloading article for {url}")
|
||||
raw_html = await PlaywrightPool.fetch_html(url)
|
||||
title, raw_html = await PlaywrightPool.fetch_html(url)
|
||||
processed_html = process_html(raw_html)
|
||||
|
||||
async with self._lock:
|
||||
# Upsert:
|
||||
self._conn.execute(
|
||||
f"""
|
||||
INSERT INTO {self._TABLE_NAME} (url, raw_html, processed_html)
|
||||
VALUES ({self.cursor_type}, {self.cursor_type}, {self.cursor_type})
|
||||
INSERT INTO {self._TABLE_NAME} (url, title, raw_html, processed_html)
|
||||
VALUES ({self.cursor_type}, {self.cursor_type}, {self.cursor_type}, {self.cursor_type})
|
||||
ON CONFLICT(url) DO UPDATE SET
|
||||
title=EXCLUDED.title,
|
||||
raw_html=EXCLUDED.raw_html,
|
||||
processed_html=EXCLUDED.processed_html
|
||||
""",
|
||||
(url, raw_html, processed_html),
|
||||
(url, title, raw_html, processed_html),
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
return processed_html
|
||||
return title, processed_html
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the underlying DB connection."""
|
||||
|
@ -193,6 +195,7 @@ class ArticleRepository:
|
|||
CREATE TABLE IF NOT EXISTS {self._TABLE_NAME} (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
url TEXT UNIQUE NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
raw_html TEXT NOT NULL,
|
||||
processed_html TEXT NOT NULL
|
||||
)
|
||||
|
@ -202,7 +205,7 @@ class ArticleRepository:
|
|||
|
||||
def _row_for_url(self, url: str) -> Optional[Tuple[Any, ...]]:
|
||||
cur = self._conn.cursor()
|
||||
cur.execute(f"SELECT id, url, raw_html, processed_html FROM {self._TABLE_NAME} WHERE url = {self.cursor_type}", (url,))
|
||||
cur.execute(f"SELECT id, url, title, raw_html, processed_html FROM {self._TABLE_NAME} WHERE url = {self.cursor_type}", (url,))
|
||||
return cur.fetchone()
|
||||
|
||||
@staticmethod
|
||||
|
|
Loading…
Reference in New Issue