fix(gh-monitor): use >= cursor + seen_ids dedup to avoid missing same-second events
This commit is contained in:
@@ -91,6 +91,7 @@ def _normalize(event_type: str, items: list[dict]) -> list[dict]:
|
||||
# review state maps to a human-readable action
|
||||
action = item.get("state", "").lower() if event_type == "review_submitted" else event_type
|
||||
events.append({
|
||||
"id": item.get("id"),
|
||||
"event_type": event_type,
|
||||
"action": action or event_type,
|
||||
"created_at": created_at,
|
||||
@@ -120,8 +121,11 @@ def get_issue_comments(owner: str, repo: str, pr_number: int) -> list[dict]:
|
||||
# STEP 6 — event diffing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def new_events_since(events: list[dict], cursor_ts: str) -> list[dict]:
|
||||
filtered = [e for e in events if e["created_at"] > cursor_ts]
|
||||
def new_events_since(events: list[dict], cursor_ts: str, seen_ids: set) -> list[dict]:
|
||||
filtered = [
|
||||
e for e in events
|
||||
if e["created_at"] >= cursor_ts and e.get("id") not in seen_ids
|
||||
]
|
||||
return sorted(filtered, key=lambda e: e["created_at"])
|
||||
|
||||
|
||||
@@ -187,6 +191,7 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict:
|
||||
repo_state = state.setdefault(repo_slug, {})
|
||||
now_ts = datetime.now(timezone.utc).isoformat()
|
||||
cursor = repo_state.get("cursor", now_ts)
|
||||
seen_ids: set = set(repo_state.get("seen_ids", []))
|
||||
|
||||
# First run: cursor = now (no backfill)
|
||||
if "cursor" not in repo_state:
|
||||
@@ -216,7 +221,7 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict:
|
||||
for fetcher in fetchers:
|
||||
try:
|
||||
events = fetcher(owner, repo, pr_number)
|
||||
new = new_events_since(events, cursor)
|
||||
new = new_events_since(events, cursor, seen_ids)
|
||||
for event in new:
|
||||
all_new_events.append((pr, event))
|
||||
except GHAPIError as e:
|
||||
@@ -230,12 +235,13 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict:
|
||||
closed_data = gh_api(f"/repos/{owner}/{repo}/pulls?state=closed")
|
||||
for pr_raw in closed_data:
|
||||
closed_at = pr_raw.get("closed_at") or ""
|
||||
if closed_at > cursor:
|
||||
if closed_at >= cursor and pr_raw.get("id") not in seen_ids:
|
||||
pr = {"number": pr_raw["number"], "title": pr_raw["title"], "html_url": pr_raw["html_url"]}
|
||||
actor = (pr_raw.get("user") or {}).get("login", "unknown")
|
||||
merged = pr_raw.get("merged_at") is not None
|
||||
action = "merged" if merged else "closed"
|
||||
event = {
|
||||
"id": pr_raw.get("id"),
|
||||
"event_type": "pr_closed",
|
||||
"action": action,
|
||||
"created_at": closed_at,
|
||||
@@ -256,13 +262,18 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict:
|
||||
text = format_notification(repo_slug, pr, event)
|
||||
notify(text)
|
||||
log.info("[%s] Notified: PR #%d %s by %s", repo_slug, pr["number"], event["action"], event["actor"])
|
||||
if event.get("id"):
|
||||
seen_ids.add(event["id"])
|
||||
|
||||
# Update cursor
|
||||
# Update cursor and seen_ids
|
||||
if all_new_events:
|
||||
repo_state["cursor"] = max(e["created_at"] for _, e in all_new_events)
|
||||
else:
|
||||
repo_state["cursor"] = now_ts
|
||||
|
||||
# Keep seen_ids bounded — only retain IDs from events at or after the cursor
|
||||
repo_state["seen_ids"] = list(seen_ids)
|
||||
|
||||
reset_errors(repo_slug, state)
|
||||
return state
|
||||
|
||||
|
||||
Reference in New Issue
Block a user