fix(gh-monitor): use >= cursor + seen_ids dedup to avoid missing same-second events
This commit is contained in:
@@ -91,6 +91,7 @@ def _normalize(event_type: str, items: list[dict]) -> list[dict]:
|
|||||||
# review state maps to a human-readable action
|
# review state maps to a human-readable action
|
||||||
action = item.get("state", "").lower() if event_type == "review_submitted" else event_type
|
action = item.get("state", "").lower() if event_type == "review_submitted" else event_type
|
||||||
events.append({
|
events.append({
|
||||||
|
"id": item.get("id"),
|
||||||
"event_type": event_type,
|
"event_type": event_type,
|
||||||
"action": action or event_type,
|
"action": action or event_type,
|
||||||
"created_at": created_at,
|
"created_at": created_at,
|
||||||
@@ -120,8 +121,11 @@ def get_issue_comments(owner: str, repo: str, pr_number: int) -> list[dict]:
|
|||||||
# STEP 6 — event diffing
|
# STEP 6 — event diffing
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def new_events_since(events: list[dict], cursor_ts: str) -> list[dict]:
|
def new_events_since(events: list[dict], cursor_ts: str, seen_ids: set) -> list[dict]:
|
||||||
filtered = [e for e in events if e["created_at"] > cursor_ts]
|
filtered = [
|
||||||
|
e for e in events
|
||||||
|
if e["created_at"] >= cursor_ts and e.get("id") not in seen_ids
|
||||||
|
]
|
||||||
return sorted(filtered, key=lambda e: e["created_at"])
|
return sorted(filtered, key=lambda e: e["created_at"])
|
||||||
|
|
||||||
|
|
||||||
@@ -187,6 +191,7 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict:
|
|||||||
repo_state = state.setdefault(repo_slug, {})
|
repo_state = state.setdefault(repo_slug, {})
|
||||||
now_ts = datetime.now(timezone.utc).isoformat()
|
now_ts = datetime.now(timezone.utc).isoformat()
|
||||||
cursor = repo_state.get("cursor", now_ts)
|
cursor = repo_state.get("cursor", now_ts)
|
||||||
|
seen_ids: set = set(repo_state.get("seen_ids", []))
|
||||||
|
|
||||||
# First run: cursor = now (no backfill)
|
# First run: cursor = now (no backfill)
|
||||||
if "cursor" not in repo_state:
|
if "cursor" not in repo_state:
|
||||||
@@ -216,7 +221,7 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict:
|
|||||||
for fetcher in fetchers:
|
for fetcher in fetchers:
|
||||||
try:
|
try:
|
||||||
events = fetcher(owner, repo, pr_number)
|
events = fetcher(owner, repo, pr_number)
|
||||||
new = new_events_since(events, cursor)
|
new = new_events_since(events, cursor, seen_ids)
|
||||||
for event in new:
|
for event in new:
|
||||||
all_new_events.append((pr, event))
|
all_new_events.append((pr, event))
|
||||||
except GHAPIError as e:
|
except GHAPIError as e:
|
||||||
@@ -230,12 +235,13 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict:
|
|||||||
closed_data = gh_api(f"/repos/{owner}/{repo}/pulls?state=closed")
|
closed_data = gh_api(f"/repos/{owner}/{repo}/pulls?state=closed")
|
||||||
for pr_raw in closed_data:
|
for pr_raw in closed_data:
|
||||||
closed_at = pr_raw.get("closed_at") or ""
|
closed_at = pr_raw.get("closed_at") or ""
|
||||||
if closed_at > cursor:
|
if closed_at >= cursor and pr_raw.get("id") not in seen_ids:
|
||||||
pr = {"number": pr_raw["number"], "title": pr_raw["title"], "html_url": pr_raw["html_url"]}
|
pr = {"number": pr_raw["number"], "title": pr_raw["title"], "html_url": pr_raw["html_url"]}
|
||||||
actor = (pr_raw.get("user") or {}).get("login", "unknown")
|
actor = (pr_raw.get("user") or {}).get("login", "unknown")
|
||||||
merged = pr_raw.get("merged_at") is not None
|
merged = pr_raw.get("merged_at") is not None
|
||||||
action = "merged" if merged else "closed"
|
action = "merged" if merged else "closed"
|
||||||
event = {
|
event = {
|
||||||
|
"id": pr_raw.get("id"),
|
||||||
"event_type": "pr_closed",
|
"event_type": "pr_closed",
|
||||||
"action": action,
|
"action": action,
|
||||||
"created_at": closed_at,
|
"created_at": closed_at,
|
||||||
@@ -256,13 +262,18 @@ def poll_repo(repo_cfg: dict, state: dict) -> dict:
|
|||||||
text = format_notification(repo_slug, pr, event)
|
text = format_notification(repo_slug, pr, event)
|
||||||
notify(text)
|
notify(text)
|
||||||
log.info("[%s] Notified: PR #%d %s by %s", repo_slug, pr["number"], event["action"], event["actor"])
|
log.info("[%s] Notified: PR #%d %s by %s", repo_slug, pr["number"], event["action"], event["actor"])
|
||||||
|
if event.get("id"):
|
||||||
|
seen_ids.add(event["id"])
|
||||||
|
|
||||||
# Update cursor
|
# Update cursor and seen_ids
|
||||||
if all_new_events:
|
if all_new_events:
|
||||||
repo_state["cursor"] = max(e["created_at"] for _, e in all_new_events)
|
repo_state["cursor"] = max(e["created_at"] for _, e in all_new_events)
|
||||||
else:
|
else:
|
||||||
repo_state["cursor"] = now_ts
|
repo_state["cursor"] = now_ts
|
||||||
|
|
||||||
|
# Keep seen_ids bounded — only retain IDs from events at or after the cursor
|
||||||
|
repo_state["seen_ids"] = list(seen_ids)
|
||||||
|
|
||||||
reset_errors(repo_slug, state)
|
reset_errors(repo_slug, state)
|
||||||
return state
|
return state
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user