From f772eed7b2a99ff8e2a9bb93db482e76ca14c4a5 Mon Sep 17 00:00:00 2001 From: mac Date: Sat, 2 May 2026 03:32:52 +0800 Subject: [PATCH 1/2] #02-rename paperbot as paperscout --- .env.example | 2 +- .github/workflows/cd.yml | 6 +- .github/workflows/ci.yml | 2 +- .github/workflows/db-backup.yml | 6 +- Dockerfile | 8 +- README.md | 86 ++++++++++---------- deploy/SERVER_SETUP.md | 62 +++++++------- deploy/{paperbot.conf => paperscout.conf} | 8 +- docker-compose.yml | 2 +- pyproject.toml | 8 +- run | 2 +- src/{paperbot => paperscout}/__init__.py | 2 +- src/{paperbot => paperscout}/__main__.py | 16 ++-- src/{paperbot => paperscout}/config.py | 2 +- src/{paperbot => paperscout}/db.py | 0 src/{paperbot => paperscout}/health.py | 0 src/{paperbot => paperscout}/models.py | 0 src/{paperbot => paperscout}/monitor.py | 0 src/{paperbot/bot.py => paperscout/scout.py} | 16 ++-- src/{paperbot => paperscout}/sources.py | 0 src/{paperbot => paperscout}/storage.py | 0 tests/conftest.py | 12 +-- tests/test_health.py | 4 +- tests/test_models.py | 4 +- tests/test_monitor.py | 16 ++-- tests/{test_bot.py => test_scout.py} | 60 +++++++------- tests/test_sources.py | 30 +++---- tests/test_storage.py | 18 ++-- 28 files changed, 186 insertions(+), 186 deletions(-) rename deploy/{paperbot.conf => paperscout.conf} (77%) rename src/{paperbot => paperscout}/__init__.py (75%) rename src/{paperbot => paperscout}/__main__.py (88%) rename src/{paperbot => paperscout}/config.py (97%) rename src/{paperbot => paperscout}/db.py (100%) rename src/{paperbot => paperscout}/health.py (100%) rename src/{paperbot => paperscout}/models.py (100%) rename src/{paperbot => paperscout}/monitor.py (100%) rename src/{paperbot/bot.py => paperscout/scout.py} (96%) rename src/{paperbot => paperscout}/sources.py (100%) rename src/{paperbot => paperscout}/storage.py (100%) rename tests/{test_bot.py => test_scout.py} (93%) diff --git a/.env.example b/.env.example index 2077299..9ec0472 100644 --- a/.env.example +++ b/.env.example @@ -7,7 +7,7 @@ PORT=3000 # Database (required) — shared PostgreSQL on the host. # When running in Docker, use host.docker.internal to reach the host: -DATABASE_URL=postgresql://paperbot:secret@host.docker.internal:5432/paperbot +DATABASE_URL=postgresql://paperscout:secret@host.docker.internal:5432/paperscout # Scheduling POLL_INTERVAL_MINUTES=30 diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index f4ac6d1..8888748 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -20,7 +20,7 @@ jobs: run: pip install -e ".[dev]" - name: Run tests - run: python -m pytest tests/ --cov=paperbot --cov-fail-under=90 -v + run: python -m pytest tests/ --cov=paperscout --cov-fail-under=90 -v deploy: name: Deploy @@ -35,9 +35,9 @@ jobs: key: ${{ secrets.SERVER_SSH_KEY }} port: ${{ secrets.SERVER_PORT || 22 }} script: | - cd /opt/paperbot + cd /opt/paperscout git pull origin main - docker compose up -d --build paperbot + docker compose up -d --build paperscout - name: Health check uses: appleboy/ssh-action@v1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f8bf389..d11cb54 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,7 +30,7 @@ jobs: - name: Run tests with coverage run: | python -m pytest tests/ \ - --cov=paperbot \ + --cov=paperscout \ --cov-report=term-missing \ --cov-report=xml \ --cov-fail-under=90 \ diff --git a/.github/workflows/db-backup.yml b/.github/workflows/db-backup.yml index c2a6f21..8a62644 100644 --- a/.github/workflows/db-backup.yml +++ b/.github/workflows/db-backup.yml @@ -19,8 +19,8 @@ jobs: port: ${{ secrets.SERVER_PORT || 22 }} script: | set -euo pipefail - DUMP="/tmp/paperbot-$(date +%Y%m%d).dump" + DUMP="/tmp/paperscout-$(date +%Y%m%d).dump" - sudo -u postgres pg_dump -Fc paperbot > "$DUMP" - gsutil cp "$DUMP" "gs://paperbot-backups/paperbot-$(date +%Y%m%d).dump" + sudo -u postgres pg_dump -Fc paperscout > "$DUMP" + gsutil cp "$DUMP" "gs://paperscout-backups/paperscout-$(date +%Y%m%d).dump" rm -f "$DUMP" diff --git a/Dockerfile b/Dockerfile index b5d4511..a8b8078 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,16 +17,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libpq5 \ && rm -rf /var/lib/apt/lists/* -RUN useradd --create-home --shell /bin/bash paperbot +RUN useradd --create-home --shell /bin/bash paperscout WORKDIR /app COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages COPY --from=builder /usr/local/bin /usr/local/bin COPY src/ src/ -RUN mkdir -p /app/data && chown paperbot:paperbot /app/data +RUN mkdir -p /app/data && chown paperscout:paperscout /app/data -USER paperbot +USER paperscout EXPOSE 3000 8080 -ENTRYPOINT ["python", "-m", "paperbot"] +ENTRYPOINT ["python", "-m", "paperscout"] diff --git a/README.md b/README.md index e33f5f0..6ab0bf7 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# paperbot-python +# paperscout-python -[![CI](https://github.com/CppDigest/paperbot-python/actions/workflows/ci.yml/badge.svg)](https://github.com/CppDigest/paperbot-python/actions/workflows/ci.yml) -[![CD](https://github.com/CppDigest/paperbot-python/actions/workflows/cd.yml/badge.svg)](https://github.com/CppDigest/paperbot-python/actions/workflows/cd.yml) +[![CI](https://github.com/cppalliance/paperscout-python/actions/workflows/ci.yml/badge.svg)](https://github.com/cppalliance/paperscout-python/actions/workflows/ci.yml) +[![CD](https://github.com/cppalliance/paperscout-python/actions/workflows/cd.yml/badge.svg)](https://github.com/cppalliance/paperscout-python/actions/workflows/cd.yml) WG21 C++ paper tracker with ISO draft probing and Slack notifications. @@ -9,7 +9,7 @@ A Python project that probes the isocpp.org paper system for unpublished D-paper ## Features -- **Per-user watchlists** -- each user manages their own list of authors and paper numbers via DM; the bot sends a personal DM when a match is found +- **Per-user watchlists** -- each user manages their own list of authors and paper numbers via DM; the scout sends a personal DM when a match is found - **ISO draft probing** -- Three-tier async HEAD requests to `isocpp.org/files/papers/` detect unpublished D-papers - **Frontier monitoring** -- Automatically probes newly assigned paper numbers beyond the current highest - **30-minute polling** -- Fetches wg21.link/index.json every 30 minutes (configurable) @@ -23,7 +23,7 @@ A Python project that probes the isocpp.org paper system for unpublished D-paper 1. Go to [https://api.slack.com/apps](https://api.slack.com/apps) and click **Create New App** 2. Choose **From scratch** -3. Name it `paperbot` (or whatever you prefer), select your workspace, click **Create App** +3. Name it `paperscout` (or whatever you prefer), select your workspace, click **Create App** ### 2. Configure Bot Permissions @@ -32,17 +32,17 @@ Go to **OAuth & Permissions** in the left sidebar. Under **Bot Token Scopes**, a | Scope | Why | |-------|-----| | `chat:write` | Post messages to channels and send DMs | -| `chat:write.public` | Post to public channels the bot hasn't been invited to | -| `im:history` | Read messages in 1:1 DMs with the bot | +| `chat:write.public` | Post to public channels the scout hasn't been invited to | +| `im:history` | Read messages in 1:1 DMs with the scout | | `im:write` | Open 1:1 DM conversations to deliver watchlist alerts | -| `mpim:history` | Read messages in group DMs the bot has been invited to | +| `mpim:history` | Read messages in group DMs the scout has been invited to | | `mpim:write` | Reply in group DMs | | `channels:history` | Read messages in public channels | -| `groups:history` | Read messages in private channels the bot is invited to | +| `groups:history` | Read messages in private channels the scout is invited to | | `groups:write` | Reply in private channels | -| `app_mentions:read` | Respond when someone `@paperbot`s | +| `app_mentions:read` | Respond when someone `@paperscout`s | -> **Note on group DMs (`mpim`):** When the bot is invited to a group DM, `watchlist` commands are rejected with a friendly error telling the user to use a 1:1 DM instead. `status` and `help` work normally. The `mpim:history` and `mpim:write` scopes are needed to receive and reply to those messages. +> **Note on group DMs (`mpim`):** When the scout is invited to a group DM, `watchlist` commands are rejected with a friendly error telling the user to use a 1:1 DM instead. `status` and `help` work normally. The `mpim:history` and `mpim:write` scopes are needed to receive and reply to those messages. ### 3. Enable Events @@ -54,8 +54,8 @@ Go to **Event Subscriptions** in the left sidebar: - `message.groups` (messages in private channels) - `message.im` (1:1 direct messages) - `message.mpim` (group direct messages) - - `app_mention` (when someone @mentions the bot) -3. You will set the **Request URL** after the bot is running (step 7) + - `app_mention` (when someone @mentions the scout) +3. You will set the **Request URL** after the scout is running (step 7) ### 4. Enable DMs @@ -72,10 +72,10 @@ Go to **App Home** in the left sidebar: 4. Copy the **Bot User OAuth Token** (starts with `xoxb-`) 5. Go to **Basic Information** and copy the **Signing Secret** -### 6. Configure and Start the Bot +### 6. Configure and Start the Scout ```bash -cd paperbot-python +cd paperscout-python cp .env.example .env ``` @@ -87,7 +87,7 @@ SLACK_BOT_TOKEN=xoxb- PORT=3000 # PostgreSQL connection string (required) -DATABASE_URL=postgresql://user:password@localhost:5432/paperbot +DATABASE_URL=postgresql://user:password@localhost:5432/paperscout # Slack channel ID for general notifications (new frontier drafts, D→P transitions). # To find it: open the channel in Slack, click the channel name @@ -101,16 +101,16 @@ Install and run: ```bash pip install -e . -python -m paperbot +python -m paperscout ``` ### 7. Set the Request URL -Once the bot is running and reachable at a public URL: +Once the scout is running and reachable at a public URL: 1. Go back to **Event Subscriptions** in the Slack app config 2. Set **Request URL** to `https://your-server.com/slack/events` -3. Slack will send a challenge request -- the bot responds automatically +3. Slack will send a challenge request -- the scout responds automatically 4. Click **Save Changes** For local testing with ngrok: @@ -120,26 +120,26 @@ ngrok http 3000 # Use the ngrok URL: https://abc123.ngrok.io/slack/events ``` -### 8. Invite the Bot +### 8. Invite the Scout -- **Public channel notifications:** The bot posts to `NOTIFICATION_CHANNEL` automatically (via `chat:write.public`). No invite needed. -- **Private channels:** Type `/invite @paperbot` in the private channel for `@mention` support. -- **Watchlist DMs (required):** Each user must open a 1:1 DM with `paperbot` to manage their personal watchlist. The bot will also DM users proactively when their watchlist matches a new paper. -- **Group DMs:** The bot can be invited, but `watchlist` commands will be rejected with a message directing the user to use a 1:1 DM. +- **Public channel notifications:** The scout posts to `NOTIFICATION_CHANNEL` automatically (via `chat:write.public`). No invite needed. +- **Private channels:** Type `/invite @paperscout` in the private channel for `@mention` support. +- **Watchlist DMs (required):** Each user must open a 1:1 DM with `paperscout` to manage their personal watchlist. The scout will also DM users proactively when their watchlist matches a new paper. +- **Group DMs:** The scout can be invited, but `watchlist` commands will be rejected with a message directing the user to use a 1:1 DM. ### 9. Verify It Works -1. DM the bot: `status` — should reply with papers loaded, last poll time, and probe stats -2. DM the bot: `watchlist add Niebler` — should confirm the author was added (as an **author** entry) -3. DM the bot: `watchlist add 2300` — should confirm the paper was added (as a **paper number** entry) -4. DM the bot: `watchlist list` — should show both entries with their types -5. DM the bot: `watchlist remove Niebler` — should confirm removal -6. Type `@paperbot status` in a channel — should reply in-thread +1. DM the scout: `status` — should reply with papers loaded, last poll time, and probe stats +2. DM the scout: `watchlist add Niebler` — should confirm the author was added (as an **author** entry) +3. DM the scout: `watchlist add 2300` — should confirm the paper was added (as a **paper number** entry) +4. DM the scout: `watchlist list` — should show both entries with their types +5. DM the scout: `watchlist remove Niebler` — should confirm removal +6. Type `@paperscout status` in a channel — should reply in-thread 7. Check your notification channel after 30 minutes — frontier hits and D→P transitions appear there; personal watchlist matches arrive as DMs ### Production Deployment -The bot runs as a Docker container deployed via CD on every push to `main`. It connects to the host's shared PostgreSQL and sits behind nginx (TLS on `:443`). +The scout runs as a Docker container deployed via CD on every push to `main`. It connects to the host's shared PostgreSQL and sits behind nginx (TLS on `:443`). ``` Push to main → CI tests → SSH into server → git pull → docker compose up --build → Health check @@ -149,8 +149,8 @@ Quick start on a fresh server: ```bash # On the server (after Docker, PostgreSQL, and nginx are set up) -git clone https://github.com/CppDigest/paperbot-python.git /opt/paperbot -cd /opt/paperbot +git clone https://github.com/cppalliance/paperscout-python.git /opt/paperscout +cd /opt/paperscout cp .env.example .env # edit with real credentials docker compose up -d --build curl -sf http://localhost:9101/health @@ -160,9 +160,9 @@ See [`deploy/SERVER_SETUP.md`](deploy/SERVER_SETUP.md) for the full Ubuntu 22.04 Database backups run daily via [`.github/workflows/db-backup.yml`](.github/workflows/db-backup.yml), uploading `pg_dump` snapshots to Google Cloud Storage. -## Bot Commands +## Scout Commands -Watchlist commands only work in a **1:1 DM** with the bot (each user has their own independent watchlist). `status` and `help` work everywhere — DMs, group DMs, and channels via `@paperbot`. +Watchlist commands only work in a **1:1 DM** with the scout (each user has their own independent watchlist). `status` and `help` work everywhere — DMs, group DMs, and channels via `@paperscout`. | Command | Where | Description | |---------|-------|-------------| @@ -268,20 +268,20 @@ All parameters are configurable via environment variables or a `.env` file. See ## Architecture ``` -paperbot-python/ - src/paperbot/ +paperscout-python/ + src/paperscout/ __main__.py Entry point; wires together all components config.py All settings via pydantic-settings models.py Paper dataclass, PaperPrefix/PaperType/FileExt enums sources.py WG21Index (PaperCache-backed), ISOProber, open-std.org scraper monitor.py Scheduler, diff engine, PerUserMatches, PollResult - bot.py Slack Bolt app, MessageQueue, notify_channel, notify_users + scout.py Slack Bolt app, MessageQueue, notify_channel, notify_users storage.py PaperCache, ProbeState, UserWatchlist (all PostgreSQL-backed) db.py ThreadedConnectionPool init and schema DDL health.py HTTP health-check endpoint (GET /health on port 8080) data/ Log files (gitignored); all other state lives in PostgreSQL deploy/ - paperbot.conf Reference nginx site config (443 → 3000, /health → 8080) + paperscout.conf Reference nginx site config (443 → 3000, /health → 8080) SERVER_SETUP.md Full Ubuntu 22.04 server provisioning guide tests/ Dockerfile Multi-stage build (python:3.12-slim) @@ -318,7 +318,7 @@ Typical per-cycle request count: **~1,600–2,000 HEAD requests** (~8–10 s at ### Alerting by Last-Modified -When a HEAD probe returns 200, the bot reads the `Last-Modified` response header. It only sends a Slack notification if the file was modified within `ALERT_MODIFIED_HOURS` (default 24 h). This means: +When a HEAD probe returns 200, the scout reads the `Last-Modified` response header. It only sends a Slack notification if the file was modified within `ALERT_MODIFIED_HOURS` (default 24 h). This means: - A D-paper uploaded today → **alert sent** - A D-paper uploaded 6 months ago that we hadn't tracked → **silently added to discovered, no alert** @@ -347,8 +347,8 @@ The `Last-Modified` timestamp is shown in every notification message. ### Setup ```bash -git clone https://github.com/CppDigest/paperbot-python.git -cd paperbot-python +git clone https://github.com/cppalliance/paperscout-python.git +cd paperscout-python python -m venv .venv source .venv/bin/activate # Windows: .venv\Scripts\activate pip install -e ".[dev]" @@ -409,7 +409,7 @@ The app container connects to the host's shared PostgreSQL via `host.docker.inte The `.github/workflows/db-backup.yml` workflow runs daily at 3 AM UTC (and supports manual dispatch): 1. SSHes into the server and runs `pg_dump` on the host's PostgreSQL -2. Uploads the dump to Google Cloud Storage (`gs://paperbot-backups/`) +2. Uploads the dump to Google Cloud Storage (`gs://paperscout-backups/`) 3. Old backups are auto-pruned by a GCS lifecycle rule (30 days) Required GitHub Secrets for CD and backups are documented in [`deploy/SERVER_SETUP.md`](deploy/SERVER_SETUP.md#9-github-secrets-checklist). diff --git a/deploy/SERVER_SETUP.md b/deploy/SERVER_SETUP.md index cce1f47..2486248 100644 --- a/deploy/SERVER_SETUP.md +++ b/deploy/SERVER_SETUP.md @@ -1,7 +1,7 @@ # Server Setup — Ubuntu 22.04 Step-by-step guide for provisioning a fresh Ubuntu 22.04 server to run -paperbot alongside other apps that share the same PostgreSQL and nginx. +paperscout alongside other apps that share the same PostgreSQL and nginx. --- @@ -50,7 +50,7 @@ newgrp docker ## 3. PostgreSQL 16 (shared instance) If PostgreSQL is already running for other apps, skip the install and jump -to **Create the paperbot database**. +to **Create the paperscout database**. ```bash # Add PGDG repo @@ -66,43 +66,43 @@ sudo apt install -y postgresql-16 sudo systemctl enable --now postgresql ``` -### Create the paperbot database +### Create the paperscout database ```bash sudo -u postgres psql <<'SQL' -CREATE USER paperbot WITH PASSWORD ; -CREATE DATABASE paperbot OWNER paperbot; +CREATE USER paperscout WITH PASSWORD ; +CREATE DATABASE paperscout OWNER paperscout; SQL ``` ### Migrate data from an existing deployment (optional) -If you are replacing an old server that already has a running paperbot +If you are replacing an old server that already has a running paperscout database, dump it on the **old** server and restore it on the new one: ```bash # --- On the OLD server --- -pg_dump -U postgres -Fc paperbot > /tmp/paperbot.dump +pg_dump -U postgres -Fc paperscout > /tmp/paperscout.dump # or on Windows -"C:/Program Files/PostgreSQL/18/bin/pg_dump" -U postgres -Fc paperbot > paperbot.dump +"C:/Program Files/PostgreSQL/18/bin/pg_dump" -U postgres -Fc paperscout > paperscout.dump # Copy the dump to the new server -scp /tmp/paperbot.dump @:/tmp/paperbot.dump +scp /tmp/paperscout.dump @:/tmp/paperscout.dump ``` ```bash # --- On the NEW server (after creating the database above) --- -pg_restore -U paperbot -d paperbot --no-owner paperbot.dump -rm /tmp/paperbot.dump +pg_restore -U paperscout -d paperscout --no-owner paperscout.dump +rm /tmp/paperscout.dump ``` If the dump is stored in GCS (from the daily backup workflow), download it directly on the new server instead: ```bash -gsutil cp gs://paperbot-backup/paperbot-.dump /tmp/paperbot.dump -pg_restore -U paperbot -h localhost -d paperbot --no-owner /tmp/paperbot.dump -rm /tmp/paperbot.dump +gsutil cp gs://paperscout-backup/paperscout-.dump /tmp/paperscout.dump +pg_restore -U paperscout -h localhost -d paperscout --no-owner /tmp/paperscout.dump +rm /tmp/paperscout.dump ``` ### Allow Docker containers to connect @@ -117,7 +117,7 @@ sudo sed -i "s/^#listen_addresses.*/listen_addresses = '*'/" \ /etc/postgresql/16/main/postgresql.conf # pg_hba.conf — allow the Docker bridge subnet -echo "host paperbot paperbot 172.16.0.0/12 scram-sha-256" | \ +echo "host paperscout paperscout 172.16.0.0/12 scram-sha-256" | \ sudo tee -a /etc/postgresql/16/main/pg_hba.conf sudo systemctl restart postgresql @@ -140,7 +140,7 @@ sudo certbot --nginx -d dev.cppdigest.org ``` Certbot creates a server block for `dev.cppdigest.org` in the default -nginx config. Add the paperbot location blocks **inside that existing +nginx config. Add the paperscout location blocks **inside that existing server block** (do NOT create a separate server block -- nginx will ignore it in favour of the first match). @@ -148,12 +148,12 @@ Open the config and find the `dev.cppdigest.org` server block with `listen 443 ssl;`. Add these lines before its closing `}`: ```nginx - # --- paperbot --- - location /paperbot/health { + # --- paperscout --- + location /paperscout/health { proxy_pass http://127.0.0.1:9101/health; } - location /paperbot/ { + location /paperscout/ { proxy_pass http://127.0.0.1:9100/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; @@ -163,7 +163,7 @@ Open the config and find the `dev.cppdigest.org` server block with ``` A reference copy of these blocks lives in -[`deploy/paperbot.conf`](paperbot.conf). +[`deploy/paperscout.conf`](paperscout.conf). ```bash sudo nginx -t && sudo systemctl reload nginx @@ -173,18 +173,18 @@ sudo nginx -t && sudo systemctl reload nginx ## 5. App deployment directory -Clone the repo into `/opt/paperbot`: +Clone the repo into `/opt/paperscout`: ```bash sudo mkdir -p /opt -sudo git clone https://github.com/CppDigest/paperbot-python.git /opt/paperbot -sudo chown -R gcp-cppdigest:gcp-cppdigest /opt/paperbot +sudo git clone https://github.com/cppalliance/paperscout-python.git /opt/paperscout +sudo chown -R gcp-cppdigest:gcp-cppdigest /opt/paperscout ``` Create the `.env` file: ```bash -cd /opt/paperbot +cd /opt/paperscout cp .env.example .env # Edit with real credentials: # SLACK_SIGNING_SECRET, SLACK_BOT_TOKEN, DATABASE_URL, NOTIFICATION_CHANNEL @@ -194,7 +194,7 @@ nano .env The `DATABASE_URL` should use `host.docker.internal`: ``` -DATABASE_URL=postgresql://paperbot:@host.docker.internal:5432/paperbot +DATABASE_URL=postgresql://paperscout:@host.docker.internal:5432/paperscout ``` > **Note:** If the password contains special characters, they must be @@ -206,13 +206,13 @@ DATABASE_URL=postgresql://paperbot:@host.docker.internal:5432/paperbot ## 6. First launch ```bash -cd /opt/paperbot +cd /opt/paperscout docker compose up -d --build # Verify sleep 5 curl -sf http://localhost:9101/health | python3 -m json.tool -docker compose logs -f paperbot +docker compose logs -f paperscout ``` --- @@ -222,9 +222,9 @@ docker compose logs -f paperbot If migrating from another server with an existing database: ```bash -gsutil cp gs://paperbot-backup/paperbot-.dump /tmp/paperbot.dump -pg_restore -U paperbot -h localhost -d paperbot -c /tmp/paperbot.dump -rm /tmp/paperbot.dump +gsutil cp gs://paperscout-backup/paperscout-.dump /tmp/paperscout.dump +pg_restore -U paperscout -h localhost -d paperscout -c /tmp/paperscout.dump +rm /tmp/paperscout.dump ``` --- @@ -235,7 +235,7 @@ The `db-backup.yml` GitHub Actions workflow SSHes into the server daily and runs `pg_dump` + `gsutil cp` to upload to GCS. The VM's service account handles authentication automatically — no credentials needed. -The GCS bucket `paperbot-backup` should have a lifecycle rule to +The GCS bucket `paperscout-backup` should have a lifecycle rule to auto-delete objects older than 30 days (configured in the Cloud Console under the bucket's **Lifecycle** tab). diff --git a/deploy/paperbot.conf b/deploy/paperscout.conf similarity index 77% rename from deploy/paperbot.conf rename to deploy/paperscout.conf index b67b401..578f4cc 100644 --- a/deploy/paperbot.conf +++ b/deploy/paperscout.conf @@ -5,15 +5,15 @@ server { ssl_certificate /etc/letsencrypt/live/dev.cppdigest.org/fullchain.pem; ssl_certificate_key /etc/letsencrypt/live/dev.cppdigest.org/privkey.pem; - # Health endpoint — must come before the general /paperbot/ block + # Health endpoint — must come before the general /paperscout/ block # because nginx uses longest-prefix matching. - location /paperbot/health { + location /paperscout/health { proxy_pass http://127.0.0.1:9101/health; } # Slack events — trailing slashes on both location and proxy_pass - # strip the /paperbot prefix so Bolt receives /slack/events. - location /paperbot/ { + # strip the /paperscout prefix so Bolt receives /slack/events. + location /paperscout/ { proxy_pass http://127.0.0.1:9100/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; diff --git a/docker-compose.yml b/docker-compose.yml index 7994f34..caf690c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,5 @@ services: - paperbot: + paperscout: build: . ports: - "127.0.0.1:9100:3000" diff --git a/pyproject.toml b/pyproject.toml index 77d929c..e8ce903 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "paperbot" +name = "paperscout" version = "0.1.0" description = "WG21 C++ paper tracker with ISO draft probing and Slack notifications" requires-python = ">=3.10" @@ -24,15 +24,15 @@ dev = [ ] [tool.hatch.build.targets.wheel] -packages = ["src/paperbot"] +packages = ["src/paperscout"] [tool.pytest.ini_options] asyncio_mode = "auto" testpaths = ["tests"] [tool.coverage.run] -source = ["paperbot"] -omit = ["*/paperbot/__main__.py"] +source = ["paperscout"] +omit = ["*/paperscout/__main__.py"] [tool.coverage.report] show_missing = true diff --git a/run b/run index 23b0f39..83ddedf 100644 --- a/run +++ b/run @@ -22,7 +22,7 @@ case "$target" in cov) "$PYTHON" -m pytest tests/ \ - --cov=paperbot \ + --cov=paperscout \ --cov-report=term-missing \ --cov-report=xml \ --cov-fail-under=90 \ diff --git a/src/paperbot/__init__.py b/src/paperscout/__init__.py similarity index 75% rename from src/paperbot/__init__.py rename to src/paperscout/__init__.py index e4cd208..7c69677 100644 --- a/src/paperbot/__init__.py +++ b/src/paperscout/__init__.py @@ -1,6 +1,6 @@ from importlib.metadata import version, PackageNotFoundError try: - __version__ = version("paperbot") + __version__ = version("paperscout") except PackageNotFoundError: __version__ = "0.0.0-dev" diff --git a/src/paperbot/__main__.py b/src/paperscout/__main__.py similarity index 88% rename from src/paperbot/__main__.py rename to src/paperscout/__main__.py index 99f5ff1..f8ff456 100644 --- a/src/paperbot/__main__.py +++ b/src/paperscout/__main__.py @@ -1,4 +1,4 @@ -"""Entry point: python -m paperbot""" +"""Entry point: python -m paperscout""" from __future__ import annotations import asyncio @@ -11,14 +11,14 @@ from datetime import datetime, timezone from .config import settings -from .bot import MessageQueue, create_app, notify_channel, notify_users, register_handlers +from .scout import MessageQueue, create_app, notify_channel, notify_users, register_handlers from .db import init_db, init_pool from .health import start_health_server from .monitor import Scheduler from .sources import ISOProber, WG21Index from .storage import ProbeState, UserWatchlist -log = logging.getLogger("paperbot") +log = logging.getLogger("paperscout") def _setup_logging(data_dir: Path, console_level: str = "INFO", @@ -26,7 +26,7 @@ def _setup_logging(data_dir: Path, console_level: str = "INFO", """Configure root logger with: • Console (stderr) — at *console_level*, for interactive monitoring. - • Rotating file (data_dir/paperbot.log) — at *console_level*, rotated + • Rotating file (data_dir/paperscout.log) — at *console_level*, rotated midnight each day, keeping *retention_days* days of history. Noisy third-party libraries are silenced to WARNING regardless. @@ -39,7 +39,7 @@ def _setup_logging(data_dir: Path, console_level: str = "INFO", ) fh = logging.handlers.TimedRotatingFileHandler( - filename=data_dir / "paperbot.log", + filename=data_dir / "paperscout.log", when="midnight", backupCount=retention_days, encoding="utf-8", @@ -73,9 +73,9 @@ async def _async_main() -> None: ) log.info( - "=== Paperbot starting port=%d poll=%dmin data=%s log=%s ===", + "=== Paperscout starting port=%d poll=%dmin data=%s log=%s ===", settings.port, settings.poll_interval_minutes, - data_dir, data_dir / "paperbot.log", + data_dir, data_dir / "paperscout.log", ) log.info( "Settings: hot_lookback=%dmo hot_depth=%d cold_divisor=%d " @@ -131,7 +131,7 @@ def main() -> None: try: asyncio.run(_async_main()) except KeyboardInterrupt: - log.info("=== Paperbot shutting down (KeyboardInterrupt) ===") + log.info("=== Paperscout shutting down (KeyboardInterrupt) ===") sys.exit(0) diff --git a/src/paperbot/config.py b/src/paperscout/config.py similarity index 97% rename from src/paperbot/config.py rename to src/paperscout/config.py index 6455915..6ed93e6 100644 --- a/src/paperbot/config.py +++ b/src/paperscout/config.py @@ -83,7 +83,7 @@ class Settings(BaseSettings): cache_ttl_hours: int = 1 # -- Logging -- - # Console log level. The rotating file (data_dir/paperbot.log) always + # Console log level. The rotating file (data_dir/paperscout.log) always # captures DEBUG so nothing is lost for post-hoc analysis. log_level: str = "INFO" # Days of log files to keep (one file per day). diff --git a/src/paperbot/db.py b/src/paperscout/db.py similarity index 100% rename from src/paperbot/db.py rename to src/paperscout/db.py diff --git a/src/paperbot/health.py b/src/paperscout/health.py similarity index 100% rename from src/paperbot/health.py rename to src/paperscout/health.py diff --git a/src/paperbot/models.py b/src/paperscout/models.py similarity index 100% rename from src/paperbot/models.py rename to src/paperscout/models.py diff --git a/src/paperbot/monitor.py b/src/paperscout/monitor.py similarity index 100% rename from src/paperbot/monitor.py rename to src/paperscout/monitor.py diff --git a/src/paperbot/bot.py b/src/paperscout/scout.py similarity index 96% rename from src/paperbot/bot.py rename to src/paperscout/scout.py index e2b822c..cc7ef57 100644 --- a/src/paperbot/bot.py +++ b/src/paperscout/scout.py @@ -276,9 +276,9 @@ def _dispatch(text: str, user_id: str, channel_type: str, say, reply_opts: dict) "Commands:\n" "• `watchlist add|remove|list [name-or-paper-number]` — " "manage your personal watchlist (DM only)\n" - "• `status` — show bot status\n" - "• `version` — show bot version\n" - "• `uptime` — show how long the bot has been running\n" + "• `status` — show scout status\n" + "• `version` — show scout version\n" + "• `uptime` — show how long the scout has been running\n" "• `help` — this message" ), **reply_opts, @@ -329,14 +329,14 @@ def handle_message(event, context, say): user_id = event.get("user", "") if channel_type == "im": - # Strip bot mention if present (e.g. user typed @bot watchlist ...) + # Strip scout mention if present (e.g. user typed @scout watchlist ...) if bot_id and f"<@{bot_id}>" in text: text = text.split(f"<@{bot_id}>", 1)[-1].strip() if text: _dispatch(text, user_id, channel_type, say=say, reply_opts=_reply_opts(event)) elif channel_type == "mpim": - # Only respond if the bot is mentioned + # Only respond if the scout is mentioned if bot_id and f"<@{bot_id}>" in text: text = text.split(f"<@{bot_id}>", 1)[-1].strip() if text: @@ -418,7 +418,7 @@ def _handle_status(state: ProbeState, paper_count_fn, say, reply_opts: dict) -> last_str = _dt.fromtimestamp(last).strftime("%Y-%m-%d %H:%M:%S") if last else "never" say( text=( - f"*Paperbot Status*\n" + f"*Paperscout Status*\n" f"• Papers loaded: {paper_count_fn():,}\n" f"• Last poll: {last_str}\n" f"• Poll interval: {settings.poll_interval_minutes} min\n" @@ -433,7 +433,7 @@ def _handle_status(state: ProbeState, paper_count_fn, say, reply_opts: dict) -> def _handle_version(say, reply_opts: dict) -> None: from . import __version__ - say(text=f"Paperbot v{__version__}", **reply_opts) + say(text=f"Paperscout v{__version__}", **reply_opts) def _format_uptime(delta) -> str: @@ -458,6 +458,6 @@ def _handle_uptime(launch_time: datetime | None, say, reply_opts: dict) -> None: delta = now - launch_time started_str = launch_time.strftime("%Y-%m-%d %H:%M:%S UTC") say( - text=f"Paperbot started {_format_uptime(delta)} ago ({started_str})", + text=f"Paperscout started {_format_uptime(delta)} ago ({started_str})", **reply_opts, ) diff --git a/src/paperbot/sources.py b/src/paperscout/sources.py similarity index 100% rename from src/paperbot/sources.py rename to src/paperscout/sources.py diff --git a/src/paperbot/storage.py b/src/paperscout/storage.py similarity index 100% rename from src/paperbot/storage.py rename to src/paperscout/storage.py diff --git a/tests/conftest.py b/tests/conftest.py index 7ba6ab1..727ac48 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,4 @@ -"""Shared fixtures and helpers for the paperbot test suite.""" +"""Shared fixtures and helpers for the paperscout test suite.""" from __future__ import annotations import json as _json @@ -6,10 +6,10 @@ import pytest -from paperbot.config import Settings -from paperbot.models import Paper -from paperbot.storage import ProbeState, UserWatchlist -from paperbot.sources import WG21Index +from paperscout.config import Settings +from paperscout.models import Paper +from paperscout.storage import ProbeState, UserWatchlist +from paperscout.sources import WG21Index # ── FakePool ───────────────────────────────────────────────────────────────── @@ -200,7 +200,7 @@ def make_test_settings(**overrides) -> Settings: notify_on_frontier_hit=True, notify_on_any_draft=True, notify_on_dp_transition=True, - data_dir=Path("/tmp/paperbot-test"), + data_dir=Path("/tmp/paperscout-test"), cache_ttl_hours=1, ) base.update(overrides) diff --git a/tests/test_health.py b/tests/test_health.py index 11f97f8..bf9b2d1 100644 --- a/tests/test_health.py +++ b/tests/test_health.py @@ -1,4 +1,4 @@ -"""Tests for paperbot.health.""" +"""Tests for paperscout.health.""" from __future__ import annotations import json @@ -7,7 +7,7 @@ import pytest -from paperbot.health import start_health_server +from paperscout.health import start_health_server def _find_free_port() -> int: diff --git a/tests/test_models.py b/tests/test_models.py index 5cbca1f..831b1c0 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,9 +1,9 @@ -"""Tests for paperbot.models.""" +"""Tests for paperscout.models.""" from __future__ import annotations import pytest -from paperbot.models import FileExt, Paper, PaperPrefix, PaperType +from paperscout.models import FileExt, Paper, PaperPrefix, PaperType # ── Enum sanity ────────────────────────────────────────────────────────────── diff --git a/tests/test_monitor.py b/tests/test_monitor.py index 658a988..4fbcfbe 100644 --- a/tests/test_monitor.py +++ b/tests/test_monitor.py @@ -1,4 +1,4 @@ -"""Tests for paperbot.monitor.""" +"""Tests for paperscout.monitor.""" from __future__ import annotations import asyncio @@ -7,16 +7,16 @@ import pytest -from paperbot.models import Paper -from paperbot.monitor import ( +from paperscout.models import Paper +from paperscout.monitor import ( DiffResult, PerUserMatches, PollResult, Scheduler, diff_snapshots, ) -from paperbot.sources import ISOProber, ProbeHit, WG21Index -from paperbot.storage import ProbeState, UserWatchlist +from paperscout.sources import ISOProber, ProbeHit, WG21Index +from paperscout.storage import ProbeState, UserWatchlist from tests.conftest import make_test_settings @@ -111,7 +111,7 @@ def test_defaults(self): assert result.per_user_matches == {} def test_explicit_dp_transitions(self): - from paperbot.monitor import DPTransition + from paperscout.monitor import DPTransition diff = DiffResult(new_papers=[], updated_papers=[]) paper = Paper(id="P2300R11") tr = DPTransition(paper=paper, draft_url="http://x", last_modified=None, discovered_at=0.0) @@ -378,7 +378,7 @@ async def mock_poll_once(): async def capture_sleep(duration: float): slept.append(duration) - with patch("paperbot.monitor.time") as mock_time: + with patch("paperscout.monitor.time") as mock_time: mock_time.monotonic.side_effect = [0.0, 360.0, 0.0] scheduler.poll_once = mock_poll_once with patch("asyncio.sleep", capture_sleep): @@ -404,7 +404,7 @@ async def mock_poll_once(): async def capture_sleep(duration: float): slept.append(duration) - with patch("paperbot.monitor.time") as mock_time: + with patch("paperscout.monitor.time") as mock_time: mock_time.monotonic.side_effect = [0.0, 2000.0, 0.0] scheduler.poll_once = mock_poll_once with patch("asyncio.sleep", capture_sleep): diff --git a/tests/test_bot.py b/tests/test_scout.py similarity index 93% rename from tests/test_bot.py rename to tests/test_scout.py index e731ff6..5f10869 100644 --- a/tests/test_bot.py +++ b/tests/test_scout.py @@ -1,4 +1,4 @@ -"""Tests for paperbot.bot.""" +"""Tests for paperscout.scout.""" from __future__ import annotations from datetime import datetime, timezone, timedelta @@ -6,11 +6,11 @@ import pytest -from paperbot.models import Paper -from paperbot.monitor import DiffResult, DPTransition, PerUserMatches, PollResult -from paperbot.sources import ProbeHit -from paperbot.storage import ProbeState, UserWatchlist -from paperbot.bot import ( +from paperscout.models import Paper +from paperscout.monitor import DiffResult, DPTransition, PerUserMatches, PollResult +from paperscout.sources import ProbeHit +from paperscout.storage import ProbeState, UserWatchlist +from paperscout.scout import ( MessageQueue, _batch_lines, _fmt_lm, @@ -126,14 +126,14 @@ class TestNotifyChannel: def test_no_channel_returns_silently(self): app = MagicMock() mq = MagicMock() - with patch("paperbot.bot.settings", _make_settings(channel="")): + with patch("paperscout.scout.settings", _make_settings(channel="")): notify_channel(app, _make_result(), mq) mq.enqueue.assert_not_called() def test_empty_result_posts_nothing(self): app = MagicMock() mq = MagicMock() - with patch("paperbot.bot.settings", _make_settings()): + with patch("paperscout.scout.settings", _make_settings()): notify_channel(app, _make_result(), mq) mq.enqueue.assert_not_called() @@ -144,7 +144,7 @@ def test_frontier_hits_batched_with_count_and_links(self): mq = MagicMock() hits = [_recent_hit(tier="frontier", number=n) for n in (4033, 4034, 4035)] result = _make_result(probe_hits=hits) - with patch("paperbot.bot.settings", _make_settings()): + with patch("paperscout.scout.settings", _make_settings()): notify_channel(app, result, mq) mq.enqueue.assert_called_once() text = mq.enqueue.call_args[0][1] @@ -157,7 +157,7 @@ def test_other_probe_hits_batched(self): mq = MagicMock() hits = [_recent_hit(tier="recent", number=n) for n in (5000, 5001)] result = _make_result(probe_hits=hits) - with patch("paperbot.bot.settings", _make_settings()): + with patch("paperscout.scout.settings", _make_settings()): notify_channel(app, result, mq) text = mq.enqueue.call_args[0][1] assert "2 new draft(s) discovered" in text @@ -167,7 +167,7 @@ def test_cold_hits_batched_with_other(self): mq = MagicMock() hit = _recent_hit(tier="cold") result = _make_result(probe_hits=[hit]) - with patch("paperbot.bot.settings", _make_settings()): + with patch("paperscout.scout.settings", _make_settings()): notify_channel(app, result, mq) assert "1 new draft(s) discovered" in mq.enqueue.call_args[0][1] @@ -175,7 +175,7 @@ def test_frontier_suppressed_when_disabled(self): app = MagicMock() mq = MagicMock() result = _make_result(probe_hits=[_recent_hit(tier="frontier")]) - with patch("paperbot.bot.settings", _make_settings(notify_on_frontier_hit=False)): + with patch("paperscout.scout.settings", _make_settings(notify_on_frontier_hit=False)): notify_channel(app, result, mq) mq.enqueue.assert_not_called() @@ -184,7 +184,7 @@ def test_any_draft_suppressed_when_disabled(self): mq = MagicMock() for tier in ("recent", "cold"): result = _make_result(probe_hits=[_recent_hit(tier=tier)]) - with patch("paperbot.bot.settings", _make_settings(notify_on_any_draft=False)): + with patch("paperscout.scout.settings", _make_settings(notify_on_any_draft=False)): notify_channel(app, result, mq) mq.enqueue.assert_not_called() @@ -194,7 +194,7 @@ def test_last_modified_shown_in_batch(self): lm = datetime.now(timezone.utc) - timedelta(hours=3) hit = _recent_hit(tier="frontier", last_modified=lm) result = _make_result(probe_hits=[hit]) - with patch("paperbot.bot.settings", _make_settings()): + with patch("paperscout.scout.settings", _make_settings()): notify_channel(app, result, mq) assert "3h ago" in mq.enqueue.call_args[0][1] @@ -209,7 +209,7 @@ def test_dp_all_transitions_are_batched(self): draft_url="https://isocpp.org/files/papers/D2300R11.pdf", last_modified=1_700_000_000.0, discovered_at=1_699_900_000.0) result = _make_result(dp_transitions=[tr]) - with patch("paperbot.bot.settings", _make_settings()): + with patch("paperscout.scout.settings", _make_settings()): notify_channel(app, result, mq) text = mq.enqueue.call_args[0][1] assert "draft(s) now published" in text @@ -224,7 +224,7 @@ def test_dp_suppressed_when_disabled(self): draft_url="https://isocpp.org/files/papers/D2300R11.pdf", last_modified=None, discovered_at=0.0) result = _make_result(dp_transitions=[tr]) - with patch("paperbot.bot.settings", _make_settings(notify_on_dp_transition=False)): + with patch("paperscout.scout.settings", _make_settings(notify_on_dp_transition=False)): notify_channel(app, result, mq) mq.enqueue.assert_not_called() @@ -236,7 +236,7 @@ def test_dp_batch_contains_draft_link(self): draft_url="https://isocpp.org/files/papers/D9999R0.pdf", last_modified=None, discovered_at=0.0) result = _make_result(dp_transitions=[tr]) - with patch("paperbot.bot.settings", _make_settings()): + with patch("paperscout.scout.settings", _make_settings()): notify_channel(app, result, mq) text = mq.enqueue.call_args[0][1] assert "draft(s) now published" in text @@ -445,7 +445,7 @@ class TestHandleStatus: def test_status_never_polled(self, fake_pool): state = ProbeState(fake_pool) say = MagicMock() - with patch("paperbot.bot.settings", _make_settings()): + with patch("paperscout.scout.settings", _make_settings()): _handle_status(state, lambda: 42, say, {}) text = say.call_args[1]["text"] assert "42" in text and "never" in text @@ -454,7 +454,7 @@ def test_status_after_poll(self, fake_pool): state = ProbeState(fake_pool) state.touch_poll() say = MagicMock() - with patch("paperbot.bot.settings", _make_settings()): + with patch("paperscout.scout.settings", _make_settings()): _handle_status(state, lambda: 100, say, {}) text = say.call_args[1]["text"] assert "100" in text and "never" not in text @@ -482,7 +482,7 @@ def decorator(fn): def test_app_mention_status(self, fake_pool): registered, _, _ = self._setup(fake_pool) say = MagicMock() - with patch("paperbot.bot.settings", _make_settings()): + with patch("paperscout.scout.settings", _make_settings()): registered["app_mention"]( event={"text": "<@U1> status", "ts": "1", "user": "U1"}, context={"bot_user_id": "U1"}, @@ -513,7 +513,7 @@ def test_app_mention_only_mention_no_command(self, fake_pool): def test_message_dm_dispatches(self, fake_pool): registered, _, _ = self._setup(fake_pool) say = MagicMock() - with patch("paperbot.bot.settings", _make_settings()): + with patch("paperscout.scout.settings", _make_settings()): registered["message"]( event={"text": "status", "channel_type": "im", "ts": "1", "user": "U1"}, context={"bot_user_id": "U1"}, @@ -524,7 +524,7 @@ def test_message_dm_dispatches(self, fake_pool): def test_message_dm_strips_mention(self, fake_pool): registered, _, _ = self._setup(fake_pool) say = MagicMock() - with patch("paperbot.bot.settings", _make_settings()): + with patch("paperscout.scout.settings", _make_settings()): registered["message"]( event={"text": "<@U1> status", "channel_type": "im", "ts": "1", "user": "U1"}, context={"bot_user_id": "U1"}, @@ -578,7 +578,7 @@ def test_message_mpim_watchlist_gets_error(self, fake_pool): def test_message_mpim_status_responds(self, fake_pool): registered, _, _ = self._setup(fake_pool) say = MagicMock() - with patch("paperbot.bot.settings", _make_settings()): + with patch("paperscout.scout.settings", _make_settings()): registered["message"]( event={"text": "<@U1> status", "channel_type": "mpim", "ts": "1", "user": "U1"}, @@ -666,11 +666,11 @@ def test_dispatch_empty_text(self, fake_pool): class TestHandleVersion: def test_version_contains_version_string(self): say = MagicMock() - with patch("paperbot.__version__", "1.2.3"): + with patch("paperscout.__version__", "1.2.3"): _handle_version(say, {}) text = say.call_args[1]["text"] assert "1.2.3" in text - assert "Paperbot" in text + assert "Paperscout" in text def test_version_forwards_reply_opts(self): say = MagicMock() @@ -701,7 +701,7 @@ def test_handle_uptime_with_launch_time(self): say = MagicMock() launch = datetime(2026, 3, 16, 10, 0, 0, tzinfo=timezone.utc) now = datetime(2026, 3, 16, 13, 30, 0, tzinfo=timezone.utc) - with patch("paperbot.bot.datetime") as mock_dt: + with patch("paperscout.scout.datetime") as mock_dt: mock_dt.now.return_value = now mock_dt.side_effect = lambda *a, **kw: datetime(*a, **kw) _handle_uptime(launch, say, {}) @@ -750,7 +750,7 @@ def test_dispatch_version(self, fake_pool): say=say, ) say.assert_called_once() - assert "Paperbot" in say.call_args[1]["text"] + assert "Paperscout" in say.call_args[1]["text"] def test_dispatch_uptime(self, fake_pool): launch = datetime.now(timezone.utc) - timedelta(hours=2, minutes=15) @@ -783,12 +783,12 @@ def test_help_mentions_version_and_uptime(self, fake_pool): class TestCreateApp: def test_create_app_uses_settings(self): - from paperbot.bot import create_app + from paperscout.scout import create_app mock_settings = MagicMock() mock_settings.slack_bot_token = "xoxb-test" mock_settings.slack_signing_secret = "secret" - with patch("paperbot.bot.settings", mock_settings): - with patch("paperbot.bot.App") as mock_app_cls: + with patch("paperscout.scout.settings", mock_settings): + with patch("paperscout.scout.App") as mock_app_cls: create_app() mock_app_cls.assert_called_once_with( token="xoxb-test", diff --git a/tests/test_sources.py b/tests/test_sources.py index 8fe9397..a08c684 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -1,4 +1,4 @@ -"""Tests for paperbot.sources.""" +"""Tests for paperscout.sources.""" from __future__ import annotations import asyncio @@ -9,8 +9,8 @@ import httpx import pytest -from paperbot.models import Paper -from paperbot.sources import ( +from paperscout.models import Paper +from paperscout.sources import ( ISOProber, OpenStdEntry, ProbeHit, @@ -20,7 +20,7 @@ _parse_open_std_html, scrape_open_std, ) -from paperbot.storage import ProbeState, UserWatchlist +from paperscout.storage import ProbeState, UserWatchlist from tests.conftest import SAMPLE_INDEX_DATA, make_test_settings @@ -120,7 +120,7 @@ async def test_download_success(self, fake_pool): index = WG21Index(fake_pool) mock_resp = _make_response(200, json_data=SAMPLE_INDEX_DATA) mock_client = _make_async_client(get_resp=mock_resp) - with patch("paperbot.sources.httpx.AsyncClient") as mock_cls: + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) result = await index._download() @@ -130,7 +130,7 @@ async def test_download_non_dict_response(self, fake_pool): index = WG21Index(fake_pool) mock_resp = _make_response(200, json_data=[1, 2, 3]) mock_client = _make_async_client(get_resp=mock_resp) - with patch("paperbot.sources.httpx.AsyncClient") as mock_cls: + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) result = await index._download() @@ -140,7 +140,7 @@ async def test_download_http_error(self, fake_pool): index = WG21Index(fake_pool) mock_client = AsyncMock() mock_client.get = AsyncMock(side_effect=httpx.HTTPError("connect failed")) - with patch("paperbot.sources.httpx.AsyncClient") as mock_cls: + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) result = await index._download() @@ -279,7 +279,7 @@ async def test_returns_empty_on_stream_exception(self): async def test_respects_byte_cap(self): """stream() should be cut off after _PDF_MAX_BYTES; no crash.""" - from paperbot.sources import _PDF_MAX_BYTES + from paperscout.sources import _PDF_MAX_BYTES big_chunk = b"x" * (_PDF_MAX_BYTES + 1) # Even though the chunk exceeds the cap, _fetch_pdf_text must not raise. # Passing invalid PDF bytes → fitz raises → caught → returns "". @@ -842,7 +842,7 @@ async def raising_head(*args, **kwargs): mock_client = AsyncMock() mock_client.head = raising_head - with patch("paperbot.sources.httpx.AsyncClient") as mock_cls: + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) with caplog.at_level(logging.DEBUG): @@ -871,7 +871,7 @@ async def test_stats_reset_each_cycle(self, fake_pool): prober._stats["miss"] = 999 # manually dirty mock_client = _make_async_client(head_resp=_make_response(404)) - with patch("paperbot.sources.httpx.AsyncClient") as mock_cls: + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) await prober.run_cycle() @@ -914,7 +914,7 @@ async def mock_get(url, **kwargs): mock_client.head = mock_head mock_client.get = mock_get - with patch("paperbot.sources.httpx.AsyncClient") as mock_cls: + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) hits = await prober.run_cycle() @@ -949,7 +949,7 @@ async def mock_head(url, **_): mock_client.head = mock_head mock_client.get = AsyncMock(return_value=_make_response(404)) - with patch("paperbot.sources.httpx.AsyncClient") as mock_cls: + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) hits = await prober.run_cycle() @@ -1009,7 +1009,7 @@ def test_parse_open_std_html_skips_no_paper_link(self): async def test_scrape_open_std_success(self): mock_resp = _make_response(200, text=OPEN_STD_HTML) mock_client = _make_async_client(get_resp=mock_resp) - with patch("paperbot.sources.httpx.AsyncClient") as mock_cls: + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) entries = await scrape_open_std(2024) @@ -1018,7 +1018,7 @@ async def test_scrape_open_std_success(self): async def test_scrape_open_std_http_error(self): mock_client = AsyncMock() mock_client.get = AsyncMock(side_effect=httpx.HTTPError("fail")) - with patch("paperbot.sources.httpx.AsyncClient") as mock_cls: + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) entries = await scrape_open_std(2024) @@ -1027,7 +1027,7 @@ async def test_scrape_open_std_http_error(self): async def test_scrape_open_std_uses_current_year_by_default(self): mock_resp = _make_response(200, text="
") mock_client = _make_async_client(get_resp=mock_resp) - with patch("paperbot.sources.httpx.AsyncClient") as mock_cls: + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) await scrape_open_std() diff --git a/tests/test_storage.py b/tests/test_storage.py index 39b9e48..55fec1b 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -1,4 +1,4 @@ -"""Tests for paperbot.storage (PostgreSQL-backed via FakePool).""" +"""Tests for paperscout.storage (PostgreSQL-backed via FakePool).""" from __future__ import annotations import time @@ -6,8 +6,8 @@ import pytest -from paperbot.models import Paper -from paperbot.storage import ( +from paperscout.models import Paper +from paperscout.storage import ( PaperCache, ProbeState, UserWatchlist, @@ -36,7 +36,7 @@ def test_is_stale_with_zero_ttl(self, fake_pool): def test_is_stale_when_old(self, fake_pool): cache = PaperCache(fake_pool, ttl_hours=1.0) cache.write({"x": 1}) - with patch("paperbot.storage.time") as mock_time: + with patch("paperscout.storage.time") as mock_time: mock_time.time.return_value = 1e12 assert not cache.is_fresh() @@ -58,7 +58,7 @@ def test_read_if_fresh_returns_data_when_fresh(self, fake_pool): def test_read_if_fresh_returns_none_when_stale(self, fake_pool): cache = PaperCache(fake_pool, ttl_hours=1.0) cache.write({"a": 1}) - with patch("paperbot.storage.time") as mock_time: + with patch("paperscout.storage.time") as mock_time: mock_time.time.return_value = 1e12 assert cache.read_if_fresh() is None @@ -297,7 +297,7 @@ def test_get_all_watched_paper_nums_union(self, fake_pool): assert nums == {2300, 2301} def test_matches_for_users_author_match(self, fake_pool): - from paperbot.monitor import PerUserMatches + from paperscout.monitor import PerUserMatches wl = UserWatchlist(fake_pool) wl.add("U1", "niebler") paper = Paper(id="P2300R11", title="X", author="Eric Niebler") @@ -307,7 +307,7 @@ def test_matches_for_users_author_match(self, fake_pool): assert paper in matched_papers def test_matches_for_users_paper_match(self, fake_pool): - from paperbot.monitor import PerUserMatches + from paperscout.monitor import PerUserMatches wl = UserWatchlist(fake_pool) wl.add("U1", "2300") paper = Paper(id="P2300R11", title="X", author="Unknown") @@ -327,7 +327,7 @@ def test_matches_for_users_empty_watchlist(self, fake_pool): assert wl.matches_for_users([paper], []) == {} def test_matches_for_users_probe_hit_author(self, fake_pool): - from paperbot.sources import ProbeHit + from paperscout.sources import ProbeHit wl = UserWatchlist(fake_pool) wl.add("U1", "niebler") hit = ProbeHit( @@ -340,7 +340,7 @@ def test_matches_for_users_probe_hit_author(self, fake_pool): assert len(result["U1"].probe_hits) == 1 def test_matches_for_users_probe_hit_paper_number(self, fake_pool): - from paperbot.sources import ProbeHit + from paperscout.sources import ProbeHit wl = UserWatchlist(fake_pool) wl.add("U1", "9999") hit = ProbeHit( From 59e9eec6b8bc8f5829854c7166b7515bc936575b Mon Sep 17 00:00:00 2001 From: mac Date: Sat, 2 May 2026 03:44:58 +0800 Subject: [PATCH 2/2] #02-addressed coderabbitai review --- .github/workflows/db-backup.yml | 5 ++-- deploy/SERVER_SETUP.md | 22 ++++++++--------- tests/conftest.py | 43 +++++++++++++++++++++------------ 3 files changed, 41 insertions(+), 29 deletions(-) diff --git a/.github/workflows/db-backup.yml b/.github/workflows/db-backup.yml index 8a62644..e317d24 100644 --- a/.github/workflows/db-backup.yml +++ b/.github/workflows/db-backup.yml @@ -19,8 +19,9 @@ jobs: port: ${{ secrets.SERVER_PORT || 22 }} script: | set -euo pipefail - DUMP="/tmp/paperscout-$(date +%Y%m%d).dump" + STAMP="$(date +%Y%m%d)" + DUMP="/tmp/paperscout-${STAMP}.dump" sudo -u postgres pg_dump -Fc paperscout > "$DUMP" - gsutil cp "$DUMP" "gs://paperscout-backups/paperscout-$(date +%Y%m%d).dump" + gsutil cp "$DUMP" "gs://paperscout-backups/paperscout-${STAMP}.dump" rm -f "$DUMP" diff --git a/deploy/SERVER_SETUP.md b/deploy/SERVER_SETUP.md index 2486248..8f278df 100644 --- a/deploy/SERVER_SETUP.md +++ b/deploy/SERVER_SETUP.md @@ -92,7 +92,7 @@ scp /tmp/paperscout.dump @:/tmp/paperscout.dump ```bash # --- On the NEW server (after creating the database above) --- -pg_restore -U paperscout -d paperscout --no-owner paperscout.dump +pg_restore -U paperscout -d paperscout --no-owner /tmp/paperscout.dump rm /tmp/paperscout.dump ``` @@ -100,7 +100,7 @@ If the dump is stored in GCS (from the daily backup workflow), download it directly on the new server instead: ```bash -gsutil cp gs://paperscout-backup/paperscout-.dump /tmp/paperscout.dump +gsutil cp gs://paperscout-backups/paperscout-.dump /tmp/paperscout.dump pg_restore -U paperscout -h localhost -d paperscout --no-owner /tmp/paperscout.dump rm /tmp/paperscout.dump ``` @@ -222,7 +222,7 @@ docker compose logs -f paperscout If migrating from another server with an existing database: ```bash -gsutil cp gs://paperscout-backup/paperscout-.dump /tmp/paperscout.dump +gsutil cp gs://paperscout-backups/paperscout-.dump /tmp/paperscout.dump pg_restore -U paperscout -h localhost -d paperscout -c /tmp/paperscout.dump rm /tmp/paperscout.dump ``` @@ -235,7 +235,7 @@ The `db-backup.yml` GitHub Actions workflow SSHes into the server daily and runs `pg_dump` + `gsutil cp` to upload to GCS. The VM's service account handles authentication automatically — no credentials needed. -The GCS bucket `paperscout-backup` should have a lifecycle rule to +The GCS bucket `paperscout-backups` should have a lifecycle rule to auto-delete objects older than 30 days (configured in the Cloud Console under the bucket's **Lifecycle** tab). @@ -245,12 +245,12 @@ under the bucket's **Lifecycle** tab). Configure these in the repo under **Settings → Secrets and variables → Actions**: -| Secret | Purpose | -| ---------------- | ------------------------------------ | -| `SERVER_HOST` | Server IP or hostname | -| `SERVER_USER` | SSH username (e.g. `gcp-cppdigest`) | -| `SERVER_SSH_KEY` | Private SSH key for the deploy user | -| `SERVER_PORT` | SSH port (optional, defaults to 22) | +| Secret | Purpose | +| ---------------- | ----------------------------------- | +| `SERVER_HOST` | Server IP or hostname | +| `SERVER_USER` | SSH username (e.g. `gcp-cppdigest`) | +| `SERVER_SSH_KEY` | Private SSH key for the deploy user | +| `SERVER_PORT` | SSH port (optional, defaults to 22) | `GITHUB_TOKEN` is provided automatically by GitHub Actions. -GCS authentication uses the VM's service account — no extra secrets needed. \ No newline at end of file +GCS authentication uses the VM's service account — no extra secrets needed. diff --git a/tests/conftest.py b/tests/conftest.py index 727ac48..80dda73 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ """Shared fixtures and helpers for the paperscout test suite.""" + from __future__ import annotations import json as _json @@ -10,20 +11,21 @@ from paperscout.models import Paper from paperscout.storage import ProbeState, UserWatchlist from paperscout.sources import WG21Index - +import tempfile # ── FakePool ───────────────────────────────────────────────────────────────── # An in-memory substitute for psycopg2.pool.ThreadedConnectionPool that # interprets the exact SQL patterns used by PaperCache, ProbeState, and # UserWatchlist. No real PostgreSQL server is required. + class _FakeStore: def __init__(self): - self.paper_cache: dict = {} # key -> (data_dict, written_at) - self.discovered: dict = {} # url -> (last_modified, discovered_at) - self.misses: dict = {} # paper_num -> count + self.paper_cache: dict = {} # key -> (data_dict, written_at) + self.discovered: dict = {} # url -> (last_modified, discovered_at) + self.misses: dict = {} # paper_num -> count self.last_poll: float = 0.0 - self.watchlist: dict = {} # (user_id, entry) -> entry_type + self.watchlist: dict = {} # (user_id, entry) -> entry_type class _FakeCursor: @@ -33,8 +35,11 @@ def __init__(self, store: _FakeStore): self._row = None self._rows: list = [] - def __enter__(self): return self - def __exit__(self, *_): pass + def __enter__(self): + return self + + def __exit__(self, *_): + pass def execute(self, sql: str, params=()): self._row = None @@ -75,10 +80,7 @@ def execute(self, sql: str, params=()): self._row = (r[0], r[1]) if r else None elif "SELECT URL, LAST_MODIFIED, DISCOVERED_AT FROM DISCOVERED_URLS" in su: - self._rows = [ - (url, lm, da) - for url, (lm, da) in self._s.discovered.items() - ] + self._rows = [(url, lm, da) for url, (lm, da) in self._s.discovered.items()] elif "SELECT URL FROM DISCOVERED_URLS" in su and "LAST_MODIFIED" not in su: self._rows = [(url,) for url in self._s.discovered] @@ -114,7 +116,9 @@ def execute(self, sql: str, params=()): self._s.watchlist[key] = etype self.rowcount = 1 - elif "DELETE FROM USER_WATCHLIST WHERE SLACK_USER_ID" in su and "AND ENTRY" in su: + elif ( + "DELETE FROM USER_WATCHLIST WHERE SLACK_USER_ID" in su and "AND ENTRY" in su + ): uid, entry = params[0], params[1] key = (uid, entry) if key in self._s.watchlist: @@ -127,7 +131,9 @@ def execute(self, sql: str, params=()): self._rows = sorted(rows, key=lambda x: (x[1], x[0])) elif "SELECT ENTRY FROM USER_WATCHLIST WHERE ENTRY_TYPE" in su: - self._rows = [(e,) for (_, e), t in self._s.watchlist.items() if t == "paper"] + self._rows = [ + (e,) for (_, e), t in self._s.watchlist.items() if t == "paper" + ] elif "SELECT SLACK_USER_ID, ENTRY, ENTRY_TYPE FROM USER_WATCHLIST" in su: self._rows = [(u, e, t) for (u, e), t in self._s.watchlist.items()] @@ -146,8 +152,11 @@ def __init__(self, store: _FakeStore): def cursor(self): return self._cur - def commit(self): pass - def rollback(self): pass + def commit(self): + pass + + def rollback(self): + pass class FakePool: @@ -169,6 +178,7 @@ def putconn(self, conn): # ── Settings factory ────────────────────────────────────────────────────────── + def make_test_settings(**overrides) -> Settings: """Build a Settings instance with safe test defaults (no I/O, no credentials).""" base: dict = dict( @@ -200,7 +210,7 @@ def make_test_settings(**overrides) -> Settings: notify_on_frontier_hit=True, notify_on_any_draft=True, notify_on_dp_transition=True, - data_dir=Path("/tmp/paperscout-test"), + data_dir=Path(tempfile.mkdtemp(prefix="paperscout-test-")), cache_ttl_hours=1, ) base.update(overrides) @@ -241,6 +251,7 @@ def make_test_settings(**overrides) -> Settings: # ── Fixtures ────────────────────────────────────────────────────────────────── + @pytest.fixture def fake_pool() -> FakePool: """Fresh in-memory pool for each test."""