fix(gh-monitor): use >= cursor + seen_ids dedup to avoid missing same-second events

This commit is contained in:
2026-03-15 18:57:49 -04:00
parent 43ab5dcc3a
commit 2f5e85fe88

View File

@@ -91,6 +91,7 @@ def _normalize(event_type: str, items: list[dict]) -> list[dict]:
# review state maps to a human-readable action
action = item.get("state", "").lower() if event_type == "review_submitted" else event_type
events.append({
"id": item.get("id"),
"event_type": event_type,
"action": action or event_type,
"created_at": created_at,
@@ -120,8 +121,11 @@ def get_issue_comments(owner: str, repo: str, pr_number: int) -> list[dict]:
# STEP 6 — event diffing
# ---------------------------------------------------------------------------
def new_events_since(events: list[dict], cursor_ts: str) -> list[dict]:
filtered = [e for e in events if e["created_at"] > cursor_ts]
def new_events_since(events: list[dict], cursor_ts: str, seen_ids: set) -> list[dict]:
filtered = [
e for e in events
if e["created_at"] >= cursor_ts and e.get("id") not in seen_ids
]
return sorted(filtered, key=lambda e: e["created_at"])
@@ -187,6 +191,7 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict:
repo_state = state.setdefault(repo_slug, {})
now_ts = datetime.now(timezone.utc).isoformat()
cursor = repo_state.get("cursor", now_ts)
seen_ids: set = set(repo_state.get("seen_ids", []))
# First run: cursor = now (no backfill)
if "cursor" not in repo_state:
@@ -216,7 +221,7 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict:
for fetcher in fetchers:
try:
events = fetcher(owner, repo, pr_number)
new = new_events_since(events, cursor)
new = new_events_since(events, cursor, seen_ids)
for event in new:
all_new_events.append((pr, event))
except GHAPIError as e:
@@ -230,12 +235,13 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict:
closed_data = gh_api(f"/repos/{owner}/{repo}/pulls?state=closed")
for pr_raw in closed_data:
closed_at = pr_raw.get("closed_at") or ""
if closed_at > cursor:
if closed_at >= cursor and pr_raw.get("id") not in seen_ids:
pr = {"number": pr_raw["number"], "title": pr_raw["title"], "html_url": pr_raw["html_url"]}
actor = (pr_raw.get("user") or {}).get("login", "unknown")
merged = pr_raw.get("merged_at") is not None
action = "merged" if merged else "closed"
event = {
"id": pr_raw.get("id"),
"event_type": "pr_closed",
"action": action,
"created_at": closed_at,
@@ -256,13 +262,18 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict:
text = format_notification(repo_slug, pr, event)
notify(text)
log.info("[%s] Notified: PR #%d %s by %s", repo_slug, pr["number"], event["action"], event["actor"])
if event.get("id"):
seen_ids.add(event["id"])
# Update cursor
# Update cursor and seen_ids
if all_new_events:
repo_state["cursor"] = max(e["created_at"] for _, e in all_new_events)
else:
repo_state["cursor"] = now_ts
# Keep seen_ids bounded — only retain IDs from events at or after the cursor
repo_state["seen_ids"] = list(seen_ids)
reset_errors(repo_slug, state)
return state