6 changed files with 10 additions and 206 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +0,0 @@
-.env
--- a/news/.gitignore
+++ b/news/.gitignore
@ -1 +0,0 @@
-*.sqlite3
--- a/news/pycache/pool.cpython-311.pyc
+++ b/news/pycache/pool.cpython-311.pyc
--- a/news/main.py
+++ b/news/main.py
@ -8,12 +8,11 @@ from typing import Final, Optional, List
 import discord
 from dotenv import load_dotenv
 import re
-from pool import PlaywrightPool, ArticleRepository
+from pool import PlaywrightPool
+import trafilatura
 import io
-from ollama import chat
-from ollama import ChatResponse
-from ollama import Client
-from ollama import AsyncClient
+
+from playwright.async_api import async_playwright, Browser, BrowserContext, Page

 load_dotenv()

@ -26,25 +25,12 @@ intents.message_content = True

 bot = discord.Client(intents=intents)

-LOGGER = logging.getLogger("main")
+LOGGER = logging.getLogger("Newsulizer")
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
 )

-article_repository = ArticleRepository()
-
-async def send_chat(messages):
-    return await AsyncClient(host="192.168.69.3:11434").chat(
-        model="deepseek-r1:8b",
-        messages=messages,
-        stream=False,
-        options={
-            'temperature': 0.5,
-            "num_ctx": 128000
-        },
-        think=True)
-
 async def send_text_file(channel: discord.abc.Messageable, content: str, filename: str = "article.md") -> None:
    fp = io.BytesIO(content.encode("utf-8"))
    file = discord.File(fp, filename=filename)
@ -60,10 +46,10 @@ async def handle_article_url(message: discord.Message, url: str) -> None:
    LOGGER.info("Received URL from %s: %s", message.author, url)

    try:
-        processed_html = await article_repository.get_article(url)
+        html = await PlaywrightPool.fetch_html(url)
        # TODO: parse `html`, summarise, etc.
-        await message.channel.send(f"✅ Article downloaded – {len(processed_html):,} bytes.")
-        await send_text_file(message.channel, processed_html)
+        await message.channel.send(f"✅ Article downloaded – {len(html):,} bytes.")
+        await send_text_file(message.channel, trafilatura.extract(html, output_format='markdown', include_images=True, include_formatting=True, include_comments=False, favor_recall=True))
    except:
        LOGGER.exception("Playwright failed")
        await message.channel.send("❌ Sorry, I couldn't fetch that page.")
--- a/news/newsulizer.sqlite3
+++ b/news/newsulizer.sqlite3
--- a/news/pool.py
+++ b/news/pool.py
@ -1,25 +1,7 @@
-from __future__ import annotations
-
 from playwright.async_api import async_playwright, Browser, BrowserContext, Page
+from typing import Final, Optional
 import asyncio

-import os
-import sqlite3
-import trafilatura
-import types
-from typing import Final, Optional, Union, Protocol, Any, Tuple
-import logging
-
-def process_html(html):
-    return trafilatura.extract(html, output_format='markdown', include_images=True, include_formatting=True,
-                        include_tables=True, include_comments=False, favor_recall=True)
-
-LOGGER = logging.getLogger("pool")
-# logging.basicConfig(
-#     level=logging.INFO,
-#     format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
-# )
-
 class PlaywrightPool:
    _pw = None           # playwright instance
    _browser: Optional[Browser] = None
@ -73,165 +55,3 @@ class PlaywrightPool:
                return html
            finally:
                await page.close()
-
-class DBConnectionInfo:
-    def __init__(
-            self,
-            dbname: str,
-            user: str,
-            password: str,
-            host: str = "localhost",
-            port: int = 5432,
-    ) -> None:
-        self.host = host
-        self.port = port
-        self.dbname = dbname
-        self.user = user
-        self.password = password
-
-
-class ArticleRepository:
-    """
-    A very small wrapper around a database that maintains a single table
-    called 'articles' inside a database called 'newsulizer'.
-
-    • If you pass an existing DB-API connection, it will be used as-is.
-    • If you don’t pass anything, a local SQLite file called
-      './newsulizer.sqlite3' is created/used automatically.
-    """
-
-    _CREATE_DB_SQLITE = "newsulizer.sqlite3"
-    _TABLE_NAME = "articles"
-
-    def __init__(
-        self,
-        connection_info: Optional[DBConnectionInfo] = None,
-        sqlite_path: Optional[str] = None,
-    ) -> None:
-        """
-        Parameters
-        ----------
-        sqlite_path:
-            Path to an SQLite file.  Defaults to ./newsulizer.sqlite3
-            when *connection* is omitted.
-        """
-
-        if connection_info is None:
-            sqlite_path = sqlite_path or self._CREATE_DB_SQLITE
-            connection = self._make_sqlite_conn(sqlite_path)
-            self.cursor_type = "?"
-        else:
-            connection = self._make_postgres_conn(
-                host=connection_info.host,
-                port=connection_info.port,
-                dbname=connection_info.dbname,
-                user=connection_info.user,
-                password=connection_info.password,
-            )
-            self.cursor_type = "%s"
-
-        self._conn = connection
-        self._ensure_schema()
-
-        # Protect SQLite (which is not async-safe) by one lock
-        self._lock = asyncio.Lock()
-
-    # ------------------------------------------------------------------ #
-    # public API
-    # ------------------------------------------------------------------ #
-    async def get_article(self, url: str) -> str:
-        """
-        Main entry point.
-        • Returns the processed text if it is already cached.
-        • Otherwise downloads it, processes it, stores it, and returns it.
-        """
-
-        # Single writer at a time when using sqlite3 – avoids `database is locked`
-        async with self._lock:
-            row = self._row_for_url(url)
-
-            if row and row[3]:                          # row = (id, url, raw, processed)
-                LOGGER.info(f"[ArticleRepository] Found cached article for {url}")
-                return row[3]                           # processed_html already present
-
-        LOGGER.info(f"[ArticleRepository] Downloading article for {url}")
-        raw_html = await PlaywrightPool.fetch_html(url)
-        processed_html = process_html(raw_html)
-
-        async with self._lock:
-            # Upsert:
-            self._conn.execute(
-                f"""
-                INSERT INTO {self._TABLE_NAME} (url, raw_html, processed_html)
-                VALUES ({self.cursor_type}, {self.cursor_type}, {self.cursor_type})
-                ON CONFLICT(url) DO UPDATE SET
-                    raw_html=EXCLUDED.raw_html,
-                    processed_html=EXCLUDED.processed_html
-                """,
-                (url, raw_html, processed_html),
-            )
-            self._conn.commit()
-
-        return processed_html
-
-    def close(self) -> None:
-        """Close the underlying DB connection."""
-        try:
-            self._conn.close()
-        except Exception:
-            pass
-
-    # ------------------------------------------------------------------ #
-    # internals
-    # ------------------------------------------------------------------ #
-    def _ensure_schema(self) -> None:
-        """Create the articles table if it does not yet exist."""
-        # Simple feature detection for DBs that do not support
-        # `ON CONFLICT` (mainly older MySQL) could be added here.
-        self._conn.execute(
-            f"""
-            CREATE TABLE IF NOT EXISTS {self._TABLE_NAME} (
-                id             INTEGER PRIMARY KEY AUTOINCREMENT,
-                url            TEXT UNIQUE NOT NULL,
-                raw_html       TEXT NOT NULL,
-                processed_html TEXT NOT NULL
-            )
-            """
-        )
-        self._conn.commit()
-
-    def _row_for_url(self, url: str) -> Optional[Tuple[Any, ...]]:
-        cur = self._conn.cursor()
-        cur.execute(f"SELECT id, url, raw_html, processed_html FROM {self._TABLE_NAME} WHERE url = {self.cursor_type}", (url,))
-        return cur.fetchone()
-
-    @staticmethod
-    def _make_sqlite_conn(sqlite_path: str) -> sqlite3.Connection:
-        first_time = not os.path.exists(sqlite_path)
-        connection = sqlite3.connect(sqlite_path, check_same_thread=False)
-        # Enforce basic integrity
-        connection.execute("PRAGMA foreign_keys = ON")
-        connection.execute("PRAGMA busy_timeout = 5000")
-
-        if first_time:
-            # Ensure a human-readable filename, not an unnamed ATTACH
-            LOGGER.info(f"[ArticleRepository] Created fresh local database at '{sqlite_path}'")
-        else:
-            LOGGER.info(f"[ArticleRepository] Reusing existing local database at '{sqlite_path}'")
-        return connection
-
-    @staticmethod
-    def _make_postgres_conn(*, host: str, port: int, dbname: str, user: str, password: Optional[str]):
-        try:
-            import psycopg2
-        except ModuleNotFoundError as exc:
-            raise RuntimeError(
-                "psycopg2 is required for PostgreSQL support – "
-                "run `pip install psycopg2-binary`"
-            ) from exc
-
-        conn = psycopg2.connect(
-            host=host, port=port, dbname=dbname, user=user, password=password
-        )
-        conn.autocommit = False
-        return conn