From 2f5e85fe8815c9671b88ce690c3926af03889555 Mon Sep 17 00:00:00 2001 From: Hans Heinemann Date: Sun, 15 Mar 2026 18:57:49 -0400 Subject: [PATCH] fix(gh-monitor): use >= cursor + seen_ids dedup to avoid missing same-second events --- tools/gh-monitor/poll.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/gh-monitor/poll.py b/tools/gh-monitor/poll.py index 8f2bd80..638a29d 100644 --- a/tools/gh-monitor/poll.py +++ b/tools/gh-monitor/poll.py @@ -91,6 +91,7 @@ def _normalize(event_type: str, items: list[dict]) -> list[dict]: # review state maps to a human-readable action action = item.get("state", "").lower() if event_type == "review_submitted" else event_type events.append({ + "id": item.get("id"), "event_type": event_type, "action": action or event_type, "created_at": created_at, @@ -120,8 +121,11 @@ def get_issue_comments(owner: str, repo: str, pr_number: int) -> list[dict]: # STEP 6 — event diffing # --------------------------------------------------------------------------- -def new_events_since(events: list[dict], cursor_ts: str) -> list[dict]: - filtered = [e for e in events if e["created_at"] > cursor_ts] +def new_events_since(events: list[dict], cursor_ts: str, seen_ids: set) -> list[dict]: + filtered = [ + e for e in events + if e["created_at"] >= cursor_ts and e.get("id") not in seen_ids + ] return sorted(filtered, key=lambda e: e["created_at"]) @@ -187,6 +191,7 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict: repo_state = state.setdefault(repo_slug, {}) now_ts = datetime.now(timezone.utc).isoformat() cursor = repo_state.get("cursor", now_ts) + seen_ids: set = set(repo_state.get("seen_ids", [])) # First run: cursor = now (no backfill) if "cursor" not in repo_state: @@ -216,7 +221,7 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict: for fetcher in fetchers: try: events = fetcher(owner, repo, pr_number) - new = new_events_since(events, cursor) + new = new_events_since(events, cursor, seen_ids) for event in new: all_new_events.append((pr, event)) except GHAPIError as e: @@ -230,12 +235,13 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict: closed_data = gh_api(f"/repos/{owner}/{repo}/pulls?state=closed") for pr_raw in closed_data: closed_at = pr_raw.get("closed_at") or "" - if closed_at > cursor: + if closed_at >= cursor and pr_raw.get("id") not in seen_ids: pr = {"number": pr_raw["number"], "title": pr_raw["title"], "html_url": pr_raw["html_url"]} actor = (pr_raw.get("user") or {}).get("login", "unknown") merged = pr_raw.get("merged_at") is not None action = "merged" if merged else "closed" event = { + "id": pr_raw.get("id"), "event_type": "pr_closed", "action": action, "created_at": closed_at, @@ -256,13 +262,18 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict: text = format_notification(repo_slug, pr, event) notify(text) log.info("[%s] Notified: PR #%d %s by %s", repo_slug, pr["number"], event["action"], event["actor"]) + if event.get("id"): + seen_ids.add(event["id"]) - # Update cursor + # Update cursor and seen_ids if all_new_events: repo_state["cursor"] = max(e["created_at"] for _, e in all_new_events) else: repo_state["cursor"] = now_ts + # Keep seen_ids bounded — only retain IDs from events at or after the cursor + repo_state["seen_ids"] = list(seen_ids) + reset_errors(repo_slug, state) return state